Implemetacion del scraping ya funcional
This commit is contained in:
		
							parent
							
								
									332d0465b6
								
							
						
					
					
						commit
						b44271753d
					
				|  | @ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab | ||||||
| from app.widgets.TodoTab import TodoTab | from app.widgets.TodoTab import TodoTab | ||||||
| from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel | from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel | ||||||
| from app.widgets.WeatherTab import WeatherTab | from app.widgets.WeatherTab import WeatherTab | ||||||
|  | from app.widgets.WebScrapingTab import WebScrapingTab | ||||||
| 
 | 
 | ||||||
| stop_event = threading.Event() | stop_event = threading.Event() | ||||||
| 
 | 
 | ||||||
|  | @ -133,6 +134,11 @@ tic_tac_toe_tab =   TicTacToeTab(notebook, stop_event=stop_event) | ||||||
| tic_tac_toe_tab.pack(fill="both", expand=True) | tic_tac_toe_tab.pack(fill="both", expand=True) | ||||||
| notebook.add(tic_tac_toe_tab, text="Tic Tac Toe") | notebook.add(tic_tac_toe_tab, text="Tic Tac Toe") | ||||||
| 
 | 
 | ||||||
|  | # Add the TodoTab to the notebook | ||||||
|  | web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event) | ||||||
|  | web_scraping_tab.pack(fill="both", expand=True) | ||||||
|  | notebook.add(web_scraping_tab, text="Web Scraping") | ||||||
|  | 
 | ||||||
| # Create the chat and music player frames within the right frame | # Create the chat and music player frames within the right frame | ||||||
| frame_chat = tk.Frame(frame_right, bg="lightgreen") | frame_chat = tk.Frame(frame_right, bg="lightgreen") | ||||||
| frame_music_player = tk.Frame(frame_right) | frame_music_player = tk.Frame(frame_right) | ||||||
|  |  | ||||||
|  | @ -0,0 +1,105 @@ | ||||||
|  | import tkinter as tk | ||||||
|  | from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox | ||||||
|  | import mysql.connector | ||||||
|  | import requests | ||||||
|  | from bs4 import BeautifulSoup | ||||||
|  | from app.widgets.abc import ThreadedTab | ||||||
|  | 
 | ||||||
|  | class WebScrapingTab(ThreadedTab): | ||||||
|  | 
 | ||||||
|  |     def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs): | ||||||
|  |         # Inicializa los atributos necesarios antes de llamar a la clase base | ||||||
|  |         self.url = StringVar() | ||||||
|  |         self.data = [] | ||||||
|  |         self.conn = None  # La conexión se inicializa después | ||||||
|  |         super().__init__(root, stop_event, **kwargs)  # Llama al constructor de ThreadedTab | ||||||
|  |         self.conn = self.create_database()  # Crea o conecta a la base de datos | ||||||
|  | 
 | ||||||
|  |     def build(self): | ||||||
|  |         # Main frame | ||||||
|  |         self.scraping_frame = Frame(self) | ||||||
|  |         self.scraping_frame.pack(fill="both", expand=True) | ||||||
|  | 
 | ||||||
|  |         # Input field for URL | ||||||
|  |         Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5) | ||||||
|  |         Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5) | ||||||
|  | 
 | ||||||
|  |         # Buttons for actions | ||||||
|  |         Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5) | ||||||
|  |         Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5) | ||||||
|  | 
 | ||||||
|  |         # Listbox to display scraped data | ||||||
|  |         self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20) | ||||||
|  |         self.data_listbox.pack(pady=10) | ||||||
|  | 
 | ||||||
|  |     def create_database(self): | ||||||
|  |         # Connect to MySQL database | ||||||
|  |         conn = mysql.connector.connect( | ||||||
|  |             host="127.0.0.1 ", | ||||||
|  |             user="santipy", | ||||||
|  |             password="1234", | ||||||
|  |             database="scraping_db" | ||||||
|  |         ) | ||||||
|  |         cursor = conn.cursor() | ||||||
|  | 
 | ||||||
|  |         # Crear la tabla si no existe | ||||||
|  |         cursor.execute(""" | ||||||
|  |                    CREATE TABLE IF NOT EXISTS scraped_data ( | ||||||
|  |                        id INT AUTO_INCREMENT PRIMARY KEY, | ||||||
|  |                        title VARCHAR(255), | ||||||
|  |                        link TEXT | ||||||
|  |                    ) | ||||||
|  |                """) | ||||||
|  |         conn.commit() | ||||||
|  |         return conn | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     def save_to_database(self): | ||||||
|  |         cursor = self.conn.cursor() | ||||||
|  |         query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)" | ||||||
|  |         cursor.executemany(query, self.data) | ||||||
|  |         self.conn.commit() | ||||||
|  | 
 | ||||||
|  |     def scrape_website(self): | ||||||
|  |         url = self.url.get() | ||||||
|  |         if not url: | ||||||
|  |             messagebox.showwarning("Warning", "Please enter a URL.") | ||||||
|  |             return | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             response = requests.get(url) | ||||||
|  |             response.raise_for_status() | ||||||
|  |         except requests.RequestException as e: | ||||||
|  |             messagebox.showerror("Error", f"Failed to fetch URL: {e}") | ||||||
|  |             return | ||||||
|  | 
 | ||||||
|  |         soup = BeautifulSoup(response.text, "html.parser") | ||||||
|  |         items = soup.select("h2 a")  # Modify selector based on website structure | ||||||
|  | 
 | ||||||
|  |         self.data = [(item.get_text(strip=True), item.get("href")) for item in items] | ||||||
|  | 
 | ||||||
|  |         if self.data: | ||||||
|  |             self.save_to_database() | ||||||
|  |             messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.") | ||||||
|  |         else: | ||||||
|  |             messagebox.showinfo("No Data", "No data found on the page.") | ||||||
|  | 
 | ||||||
|  |         self.update_listbox() | ||||||
|  | 
 | ||||||
|  |     def update_listbox(self): | ||||||
|  |         self.data_listbox.delete(0, "end") | ||||||
|  |         for title, link in self.data: | ||||||
|  |             self.data_listbox.insert("end", f"Title: {title} | Link: {link}") | ||||||
|  | 
 | ||||||
|  |     def view_data(self): | ||||||
|  |         cursor = self.conn.cursor() | ||||||
|  |         cursor.execute("SELECT title, link FROM scraped_data") | ||||||
|  |         rows = cursor.fetchall() | ||||||
|  | 
 | ||||||
|  |         self.data_listbox.delete(0, "end") | ||||||
|  |         for title, link in rows: | ||||||
|  |             self.data_listbox.insert("end", f"Title: {title} | Link: {link}") | ||||||
|  | 
 | ||||||
|  |     def task(self): | ||||||
|  |         # Placeholder for any background task | ||||||
|  |         pass | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| from .ClockLabel import ClockLabel | from .ClockLabel import ClockLabel | ||||||
| from .UsageLabels import CPULabel, RAMLabel | from .UsageLabels import CPULabel, RAMLabel | ||||||
|  | from .WebScrapingTab import WebScrapingTab | ||||||
| 
 | 
 | ||||||
| __all__ = ['ClockLabel', 'CPULabel', 'RAMLabel'] | __all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab'] | ||||||
|  |  | ||||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
		Loading…
	
		Reference in New Issue