From 647cef869a31128c04914f7007460878aeea5eea Mon Sep 17 00:00:00 2001 From: DennisEckerskorn Date: Tue, 10 Dec 2024 20:42:40 +0100 Subject: [PATCH] Tried fixing the sopt issue with thread, but it just doesnt want to, start scrap works fine now --- src/main.py | 4 +- .../__pycache__/scrapper.cpython-312.pyc | Bin 7648 -> 7439 bytes .../system_monitor.cpython-312.pyc | Bin 5680 -> 5604 bytes .../threads_manager.cpython-312.pyc | Bin 9372 -> 9215 bytes src/services/scrapper.py | 106 ++++++++---------- src/services/system_monitor.py | 2 +- src/services/threads_manager.py | 2 +- 7 files changed, 54 insertions(+), 60 deletions(-) diff --git a/src/main.py b/src/main.py index 6ccb86b..68495be 100644 --- a/src/main.py +++ b/src/main.py @@ -8,4 +8,6 @@ def main(): print(f"Error al iniciar la aplicación: {e}") if __name__ == "__main__": - main() \ No newline at end of file + main() + + #self.tasks["scrapper"].start(self.scrapper.start_scraping) \ No newline at end of file diff --git a/src/services/__pycache__/scrapper.cpython-312.pyc b/src/services/__pycache__/scrapper.cpython-312.pyc index 314d4fe64f15aa271abd99036490ad5bc8711218..38c4ff1051da262f8726dbe834a76d331dcfebf7 100644 GIT binary patch delta 1970 zcmYjRU2GIp6ux)oXJ_`e``77qyX~~yVz-t4NiD6F0^Lm@Vj+Sc0@*A(L*3HuE_Y^A zT6bxU4~l6h=7xX*0SP9WV!-sxL=&u)7-K?XN!JYzCGky-q+mjf58gZ5(w=0#d*|G9 z&dfdMJ7@Wo=^n?=cDn@-8UEz0!G!N?M-KysdKV!m0E>tyU+F)d!fh7kB#!O)WpXO^OvLT|1 z4$E2s#WR^?YET}F%c`c&eI~0ZSshXn*+g<~Hq|n4U|iarN+y!=l#(u)Q)@NVBZrwH zugB4#sux+ZXhmL$t0U=D*n}EMbksy3TTftsII406r88-iNTySBv-nTm6$}fgoZ8h8 zC^|`)SQZV(PN4KiEStmu^Sk~J$;l*GwmPSKKk9vGbKQ4uU9@!$r1xhsSv+cKhn@IKOV^%uqS9uJ+y5@u z?>n|E3Z2V>ZAqwH5Gt31O$)-NMWL}E*rsh~iPpa$_!ou1V;+e1Y3Etzk`P!B0+%9- zLhWN7w_A^f7lqb`G*H(AVJqq&ffFVIYa)^jCDjv_!ENqJe9!s|{ATuotr@~veAh1b zkemkBjMvv3zn+|_dKQd9oz~J2#gc=Ufnvo+9NnGf)68Ko3KJ$szME%9K|RprRYMR0 zD3CXA@U*RZ{^V$ecKnayO}Gc|bGCCBk~g{GI2#I3P?SbCCt@|451MI+}YKE z`l!if3iN+aEdfHdI=XKoKz+1KI;E*aR!e6y=rtTHYm}+OxOczN5h`ji3aG|1@j*38 zPP8|$TDA>(@y)W!)iw~UJ0B?oYOeKu+WT8Yu;8s&@`e_?q3Z`8c$={1YA9<44(aUS z|5hBpe+)i*T8+POwe^Y2UHcAKGyuM@u8Wqlb6`gWdHKQbp}09fa9-r19`k%TNB03S z+Q-a?T+tq8zK4tU%}VYJ^lhQ{=%ny6g)R!+_=+by)I+udVGenyYCDA;6n0YRClKb> zlYkCVZG?gm>k!##JOSAd)Id-66Bu_tPf0eT#C4T)r-EC(7wQcjJ#Q6aY9^1tqXN-h z1R@ae9jjy-VzA8n@NJ)1O()nS9Ak3uC?R0plxK8~I2p3qwj%GTQ!Gi^$HtIcJ1(s^ z;W?ORb1Yuws@Oc2<8Yhkv$N|&G&JSj9FsGhFwLy8oJ<9F$c3*9L53Uh;K}kDe5l-m z&j`W<1UhAuAqt;k2x-kZ6G7oBw;pxsM?zUTtqdWQt;q8m{KOo88C*53+aCrBljqOy zQ^C?Y3P(Yy3BuO#&fSE1NRP|IX$^lRwQ;*h)B*fRT6Q2e*^(JTX9C)bixt}^%tphpgr~SUv^sCf)o-KH!^SuSHf4Tkze9w=~ zHNfe4_Eg5Bl?^hT5ARp^3*g4i27+^f*dJ!*T#o(_GZ*6e!?WL3wnDfU{}qtk3jHQ6 zjlBAo&A6s22tUBDR-Fsc*@tMyl{%PG5YbzpW(qVvs0069)zD_=x>u^*1RATjmsYk} zxVDu_4;O(?UNmzR#*jla9bqeysM7$B1YfVFoHa7@uh2V^R;z8$P` NMoh3`1yD4C{11M0%>n=b delta 1906 zcmZuyeN0nV6utE{k`+f zIrqGK@A;k6k;a84<&Mc@L{$F%<+bB`oZl#oLjFdK&=?Bi6zXQ4oTQ*DKOZax$(F_` zO!^bTswd3BbzvuuCwP#=0|K7VjdNfWgWjaLE=U-sNts}j6g53u5q_dnnWCMM4zwoQ6nVugW>>H&LWVc;i}R9*;<09{+xulf4`DKQ5{Um8`z%3 z_~w;YJNza2@tQ-5<00lU(nvd&;hf11g9a6wnPwIy#C7>N*yd2-w!sQd3@ZF;P-xVo zNd?6yo03K??{rZplIV?`jKJ5T6L-KJqXvVAJ#oG7H9&q=fhtR_2;qJFrfsFq_mte-HBuUvGr1Y)AsHMB#Ak~mSix+a{L z!WU3jhOE+n%fM>37jIbg1rhP2_3L6yE3xh73ar2>^FHxq8cWLI2lFPZ!V`0mKw78; zyq5aPc|JCfAWjy*Zi!SesAiD6(_*So)&!Vg6T~bk-U;to%5fEZX>p6K)Es{c?pn5D zJIK}xFBnl_U^F!6@qOC-N%POH!ux7rRt;v<;FVCevN==Pd^2>jetOUAGwK^z^>9W# zJgXjoDQl@kLyGhK{y&!##Q9G{Otmv!*{LM7Q^|Q+1BeA)HB$^pb>tvYtbgEc4`8# zGt*WE>^x)}gEk79xN>f0*vnu$gIunCREvug{5c`4Qe+o})3!ZFNlz>mAyosha8DxA zJN~cm4AwB)EvOl`ct?DZ(<})sGbkRfFixxv3=2tXhFyUT~LnthyI7&@1M>_q*b;F1?0^RY@ zBv2S4Qys19c(=QU%26Ypm2Jg>AR5~TOdfkdR8 zkbaVDV2xYma1T2OtF7O^%+*S&pLVTr^>`$AHQmQZ{|U<2GeA7U!RKiV*})(;6&Srq z4Fk3~cjpnxe|VrTvX!v=W-StD|5}SY#`qqx+(YIEsPF;u-$RZE$oac8h_UM-Vz~G= KZ^gRY7ybcWinHSY diff --git a/src/services/__pycache__/system_monitor.cpython-312.pyc b/src/services/__pycache__/system_monitor.cpython-312.pyc index b9251dcd8a05aa60e7dfd6c3adb9d69205e40d0d..fd19bbf154ae7a0b5ee9c2f4135bc250a309b300 100644 GIT binary patch delta 80 zcmdm>^F*8XG%qg~0}wd2N2Fig$eSa?%>?8z1M%k@43jsoDNPPw;+VWnXboEpOE7~b h)8sl~8zxQG$ybEcCy5j@rcEvpNnw1j`IU$pBLHUx7QFxf delta 158 zcmaE&y+McfG%qg~0}${xMWkQa$eSa?%?ji(1M%l?43jsoDNPPw;+VWnXpKq;>HL7&ig4a diff --git a/src/services/__pycache__/threads_manager.cpython-312.pyc b/src/services/__pycache__/threads_manager.cpython-312.pyc index dd282d2c8b47a7378e4e3199d7202ed0595251ca..c7e6f2f99ccde4c51dd4de6eba60847cf53e0595 100644 GIT binary patch delta 564 zcmbQ^`QM%QG%qg~0}$|cM5H%w1=%N0VHaZ*nyksL%qTipmR*i1g=KOsyW(UgHW@~V z$@#1@Y?Z8)teR|-&$2tS^8u~$)8w5j!(lr)k7Fw1oyjbmF{~fh7&4@M2Hz9c69O3wGszjG z)_${`gcdVQpP#f0qr>EE=}kc8ax(G;9v~%fClqmmxL_k#!H&7b?ik=1@9i3Si^bV9 zB(lhObFPdv6Qkecm2w(D|6P)^XY}7JB%i~i1#uV1a;96X$@!&uC13-JnSeqK4G=Fl z0NGV~KnIIW-lv!dbb*4BlsMQ02z!|zfsiygQ0b|>Do{g_E{FhyVv!PvkOvV8Ai{rg usB(^gD~Ji!9|mHjOundGE|mphih~GH*!|+L$<0qG%}KQ@%AcI1Vg&#?Nq0^F delta 691 zcmYL_O=uHQ5XbX2-4FAzNz9jS)?{On+9VRS#!E}FYqb*m%Ar(?))10VQkyiqq=@3B z2L(Y?9Pp%2PYQ}q*F!JjNl=7fKpM%Z7s=6!;6bFmNl3d3zc zwPL;L*c8VxS5VX^VQ;Vr6b~5t~RI>hxZ)pEGW+PH&TH2;h&*l6r>O_1nz@|5E>#2 e8wZ!1(-`*pFJn}KbmS(v0dFGvNDV$k#{L5m>z@k% diff --git a/src/services/scrapper.py b/src/services/scrapper.py index 3c477de..22a1486 100644 --- a/src/services/scrapper.py +++ b/src/services/scrapper.py @@ -33,6 +33,10 @@ class Scrapper: def start_scraping(self): """Inicia el proceso de scraping""" + if self.running: + print("El scrapping ya está en ejecución.") + return + self.running = True url = self.get_url_from_ui() if url: @@ -42,14 +46,15 @@ class Scrapper: else: print("No se proporcionó una URL válida.") - def stop_scraping(self): - """Detiene el proceso de scraping""" - self.running = False - print("Scrapping detenido. Proceso finalizado.") + def stop_scraping(self): + """Detiene el proceso de scraping""" + print("Deteniendo el proceso de scraping...") + # Detener las tareas + self.scraping_task.stop_thread() + self.db_task.stop() - #Vaciar la cola para detener el hilo de inserción - while not self.link_queue.empty(): - self.link_queue.get() + # Inserta un sentinel (None) en la cola para detener el hilo de inserción + self.link_queue.put(None) # Actualiza la pestaña "Scrapping" con un mensaje tab = self.ui_instance.tabs["Scrapping"] @@ -58,36 +63,39 @@ class Scrapper: text_widget.configure(state="normal") text_widget.insert("end", "Scrapping finalizado.\n") text_widget.see("end") - text_widget.configure(state="disabled") + text_widget.configure(state="disabled") + print("Scrapping detenido. Proceso finalizado.") - def scrape_page(self, url): - """Scrapea una web y busca los enlaces""" - if not self.running or url in self.visited_links: - return - - with self.lock: - self.visited_links.add(url) + def scrape_page(self, url): + """Scrapea una web y busca los enlaces""" + if not self.running or url in self.visited_links: + return - try: - response = requests.get(url, timeout=10) - if response.status_code == 200: - soup = BeautifulSoup(response.text, "html.parser") - links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)] - self.update_ui(url, links) + with self.lock: + self.visited_links.add(url) - for link in links: - if not self.running: - break - self.link_queue.put((url, link)) + try: + response = requests.get(url, timeout=10) + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)] + self.update_ui(url, links) + + for link in links: + if not self.running: + break + self.link_queue.put((url, link)) + + # Procesar los enlaces de forma secuencial en lugar de crear nuevos hilos + for link in links: + if not self.running: + break + self.scrape_page(link) + else: + print(f"Error al acceder a {url}: {response.status_code}") + except Exception as e: + print(f"Error al scrapear {url}: {e}") - for link in links: - if not self.running: - break - threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start() - else: - print(f"Error al acceder a {url}: {response.status_code}") - except Exception as e: - print(f"Error al scrapear {url}: {e}") def update_ui(self, url, links): """Actualiza la pestaña 'Scrapping' con los enlaces encontrados""" @@ -102,15 +110,16 @@ class Scrapper: text_widget.configure(state="disabled") - def insert_links_to_db(self): """Inserta los enlaces en la base de datos desde la cola""" - while self.running or not self.link_queue.empty(): + while True: try: - # Obtener un enlace de la cola - if not self.running and self.link_queue.empty(): - break - parent_url, link = self.link_queue.get(timeout=1) # Espera 1 segundo si la cola está vacía + # Obtener un enlace de la cola + item = self.link_queue.get(timeout=1) + if item is None: # Si encuentra el sentinel, detiene el hilo + break + + parent_url, link = item connection = mysql.connector.connect(**self.db_config) cursor = connection.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)") @@ -120,7 +129,7 @@ class Scrapper: connection.close() print(f"Enlace guardado: {link} (parent: {parent_url})") except Exception as e: - print(f"Error al guardar en la base de datos: {e}") + print(f"Error al guardar en la base de datos: {e}") def get_url_from_ui(self): @@ -131,23 +140,6 @@ class Scrapper: except AttributeError: print("No se pudo obtener la URL desde la interfaz") return None -""" - def save_links_to_db(self, url, links): - Guarda los enlaces en la base de datos - try: - connection = mysql.connector.connect(**self.db_config) - cursor = connection.cursor() - cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)") - - for link in links: - print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos - cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url)) - connection.commit() - cursor.close() - connection.close() - except Exception as e: - print(f"Error al gaurdar en la base de datos: {e}") -""" \ No newline at end of file diff --git a/src/services/system_monitor.py b/src/services/system_monitor.py index e0f86ae..0e64cc8 100644 --- a/src/services/system_monitor.py +++ b/src/services/system_monitor.py @@ -92,5 +92,5 @@ class SystemMonitor: # Convierte a KB/s total_kb = (sent_bytes + recv_bytes) / 1024 - print(f"Network Usage: {total_kb} KB/s") + #print(f"Network Usage: {total_kb} KB/s") return total_kb \ No newline at end of file diff --git a/src/services/threads_manager.py b/src/services/threads_manager.py index 1e0fed9..3daed0c 100644 --- a/src/services/threads_manager.py +++ b/src/services/threads_manager.py @@ -38,7 +38,7 @@ class ThreadsManager: self.tasks["time"].start(self.update_time) self.tasks["temperature"].start(self.update_temperature) self.tasks["emails"].start(self.update_emails) - self.tasks["scrapper"].start(self.scrapper.start_scraping) + if self.system_monitor: for metric in self.system_monitor.metrics.keys():