playground

2025-11-17 15:18:32 +01:00 · 2025-11-17 15:18:32 +01:00 · e822a36255
parent 6850b8d91e
commit e822a36255
39 changed files with 10818 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,13 @@
+- **Ejercicios básicos**: Incluye problemas de sintaxis, estructuras de control, manipulación de colecciones, y
+  funciones.
+- **Exploración de librerías**: Ejemplos y ejercicios con librerías populares como:
+    - **Pandas**: Manipulación y análisis de datos.
+    - **Matplotlib**: Visualización de datos.
+    - **FastAPI**: Creación de APIs rápidas y eficientes.
+    - **Functools**: Trabajo con funciones de orden superior como `map`, `filter` y `reduce`.
+
+---
+
+
+
+
--- a/basics/classes.py
+++ b/basics/classes.py
@ -0,0 +1,11 @@
+class Person:
+
+    def __init__(self, name, age):
+        self.name = name
+        self.age = age
+
+
+if __name__ == '__main__':
+    person = Person("David", 25)
+
+    print(f"My name is {person.name}")
--- a/basics/conditions.py
+++ b/basics/conditions.py
@ -0,0 +1,12 @@
+if __name__ == '__main__':
+
+   x = 5
+   y = 2
+
+   if x % 2 == 0:
+       print("Even")
+   else:
+       print("Odd")
+
+   result = "Even" if y % 2 == 0 else "Odd"
+   print(result)
--- a/basics/data_structures.py
+++ b/basics/data_structures.py
@ -0,0 +1,112 @@
+import queue
+from array import array
+from collections import deque, Counter
+
+
+# Collections ordered with mutable data, can contain different types of data
+def lists():
+    data = [1, 2, 3, 4, 5, "David"]
+
+    data.remove(5)
+    print(f"List with element removed: {data}")
+
+    data.append(5)
+
+    for datum in data:
+        print(f"Number {datum}")
+
+    print(f"List sliced: {data[1:3]}")
+    print(f"List reversed: {data[::-1]}")
+
+
+# Collections ordered with immutable data, can contain different types of data
+def tuples():
+    data = (24, "David", 20, "Alberto")
+    print(f"Tuples {data}")
+
+    print(f"First element: {data[0]}")
+
+
+# Collections unordered with immutable data, can contain different types of data
+def sets():
+    data = {1, 2, 3, 4, 5, 6}
+    data2 = {1, 2, 3, 4, 5}
+    print(f"Set of data: {data}")
+
+    intersection = data.intersection(data2)
+    union = data.union(data2)
+    difference = data.difference(data2)
+
+    print(f"Intersection {intersection}")
+    print(f"Union {union}")
+    print(f"Difference {difference}")
+
+
+# Collections of key-value
+def dictionaries():
+    data = {"David": 25, "Alberto": 20}
+    data["John"] = 80
+    data.pop("Alberto")
+    print(f"Data {data}")
+    print(f"Data for David {data["David"]}")
+
+
+def strings():
+    data = "Hello world!"
+
+    print(f"Data {data}")
+
+    replace = data.replace("world", "David")
+
+    print(f"Data replaced {replace}")
+
+
+def collections():
+    my_deque = deque([1, 2, 3])
+    my_deque.popleft()
+
+    my_counter = Counter(['apple', 'banana', 'apple'])
+    most_common = my_counter.most_common()
+
+    print(f"Data queue {my_deque}")
+    print(f"Data counter items {my_counter}")
+    print(f"Data counter most common {most_common}")
+
+
+def arrays():
+    my_array = array('i', [1, 2, 3, 4])
+    print(f"Data  {my_array}")
+    print(f"First data  {my_array[0]}")
+
+
+def queues():
+    my_queue = queue.Queue()
+    my_queue.put(1)
+    my_queue.put(2)
+
+    print(f"First Data  {my_queue.get()}")
+
+
+def manipulate_list():
+    names = ["david", "alberto", "john"]
+
+    mapped = [name.upper() for name in names]
+
+    filtered = [name for name in mapped if name.startswith("D")]
+
+    print(filtered)
+
+
+def manipulate_list2():
+    names = ["david", "alberto", "john"]
+
+    mapped = list(map(lambda name: name.upper(), names))
+
+    filtered = list(filter(lambda name: name.startswith("D"), mapped))
+
+    print(filtered)
+
+
+if __name__ == '__main__':
+    # lists()
+    manipulate_list()
--- a/basics/dates.py
+++ b/basics/dates.py
@ -0,0 +1,52 @@
+from datetime import datetime, timedelta
+
+import pytz
+
+
+def iso_format(date_string: str) -> datetime:
+    return datetime.fromisoformat(date_string)
+
+
+def str_format(date_string: str, date_format: str) -> datetime:
+    return datetime.strptime(date_string, date_format)
+
+
+def apply_format(date_time: datetime, date_format: str) -> str:
+    return date_time.strftime(date_format)
+
+
+def add_days(date_time, days_to_add):
+    return date_time + timedelta(days=days_to_add)
+
+
+def difference_days(date_time_one, date_time_two):
+    return date_time_two - date_time_one
+
+
+def now_in_time_zone(time_zone):
+    zone = pytz.timezone(time_zone)
+    return datetime.now(zone)
+
+
+if __name__ == "__main__":
+    date = datetime(2025, 1, 5)
+    print(f"The date created is {date}")
+
+    iso_format = iso_format("2024-12-12")
+    print(f"The date with iso format is {iso_format}")
+
+    now = datetime.now()
+    print(f"Today is {now}")
+
+    string_format = str_format("12/12/2024", "%d/%m/%Y")
+    formated = now.strftime("%d/%m/%Y %H:%M:%S")
+    print(f"The date formated is {formated}")
+
+    tomorrow = add_days(now, 1)
+    print(f"Today + 5 days is {tomorrow}")
+
+    difference = difference_days(now, tomorrow)
+    print(f"Tomorrow minus today is {difference}")
+
+    now_mexico = now_in_time_zone('America/Mexico_City')
+    print(f"Today is {now_mexico} in Mexico")
--- a/basics/files.py
+++ b/basics/files.py
@ -0,0 +1,29 @@
+def read(file):
+    for line in file:
+        print(line)
+
+
+def write(file):
+    file.write("\nHello, this is a file created with Python!")
+
+
+def use_file(path, file_operation):
+    try:
+        with open(path, file_operation, encoding="utf-8") as file:
+
+            match file_operation:
+                case "r":
+                    read(file)
+                case "w":
+                    write(file)
+                case "a":
+                    write(file)
+
+    except FileNotFoundError:
+        print("The file not exist")
+
+
+if __name__ == "__main__":
+    use_file("resources/employee_data.csv", "r")
+    # use_file("../resources/hello.txt", "w")
+    # use_file("../resources/hello.txt", "a")
--- a/basics/hello.py
+++ b/basics/hello.py
@ -0,0 +1,6 @@
+def print_hi(name):
+    print(f'Hi, {name}')
+
+
+if __name__ == '__main__':
+    print_hi('Hello world!')
--- a/libraries/api.py
+++ b/libraries/api.py
@ -0,0 +1,26 @@
+import pandas as pd
+import uvicorn
+from fastapi import FastAPI
+
+app = FastAPI()
+
+
+def get_employees():
+    df = pd.read_csv("resources/employee_data.csv")
+    head = df.head(10)
+    data = head.to_dict(orient="records")
+    return data
+
+
+@app.get("/")
+async def read_root():
+    return {"message": "Hello, world!"}
+
+
+@app.get("/employee")
+async def read_root():
+    return get_employees()
+
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/libraries/matplot.py
+++ b/libraries/matplot.py
@ -0,0 +1,39 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+def salary_vs_age(dfr, plot):
+    plot.figure(figsize=(8, 6))
+    plot.scatter(dfr['age'], dfr['salary'], color='blue')
+    plot.title('Salary vs Age')
+    plot.xlabel('Age')
+    plot.ylabel('Salary')
+    plot.grid(True)
+    plot.show()
+
+
+def salary_histogram(dfr, plot):
+    dfr['salary'].hist()
+    plot.show()
+
+
+def avg_salary_by_department(dfr, plot):
+    avg_salary_by_dept = dfr.groupby('department')['salary'].mean()
+
+    plot.figure(figsize=(8, 6))
+    avg_salary_by_dept.plot(kind='bar', color='green')
+
+    plot.title('Salary mean by department')
+    plot.xlabel('Department')
+    plot.ylabel('Salary mean')
+
+    plot.xticks(rotation=45)
+    plot.show()
+
+
+if __name__ == "__main__":
+    df = pd.read_csv("../resources/employee_data.csv")
+
+    salary_vs_age(df, plt)
+    # salary_histogram(df, plt)
+    # avg_salary_by_department(df, plt)
--- a/libraries/nump.py
+++ b/libraries/nump.py
@ -0,0 +1,6 @@
+import numpy as np
+
+if __name__ == "__main__":
+    randint = np.random.randint(1, 100)
+
+    print(f"The random number is {randint}")
--- a/libraries/panda.py
+++ b/libraries/panda.py
@ -0,0 +1,55 @@
+import pandas as pd
+from pandas.core.interchange.dataframe_protocol import DataFrame
+
+
+def simple_data():
+    dataset = {
+        'cars': ["BMW", "Volvo", "Ford"],
+        'passings': [3, 7, 2]
+    }
+
+    dfr = pd.DataFrame(dataset)
+
+    print("Simple df")
+    print(dfr)
+
+    data_where = dfr.query("passings > 2")[['cars']]
+    # data_where = df[df["passings"] > 2]
+    # data_where = df.where(df["passings"] > 2, other="X")
+
+    return data_where
+
+
+def employee_max_salary(df: DataFrame):
+    return df.loc[df["salary"].idxmax()]
+
+
+def employee_greater_salary(df: DataFrame):
+    return df.query("salary > 10.000")[["first_name", "last_name", "salary", "location"]]
+
+
+def employee_full_name(df):
+    return df["first_name"].str.cat(df["last_name"], sep=" ")
+
+
+def employee_greater_salary_by_location(df: DataFrame):
+    return df.groupby("location").apply(lambda group: group.loc[group['salary'].idxmax()])
+
+
+def change_date_format(df: DataFrame):
+    df['hire_date'] = pd.to_datetime(df['hire_date'])
+    return df['hire_date'].dt.strftime('%d/%m/%Y')
+
+
+if __name__ == "__main__":
+    df = pd.read_csv("../resources/employee_data.csv")
+
+    # data = simple_data()
+    # higher_than_salary = employee_greater_salary(df)
+    # max_salary = employee_max_salary(df)
+    # df["full_name"] = employee_full_name(df)
+    # top_employee_by_city = employee_greater_salary_by_location(df)
+    # df['hire_date'] = change_date_format(df)
+
+    # print(df.describe())
+    print(df.head())
--- a/libraries/pil.py
+++ b/libraries/pil.py
--- a/libraries/request.py
+++ b/libraries/request.py
@ -0,0 +1,15 @@
+import requests
+
+if __name__ == "__main__":
+
+    url = "https://jsonplaceholder.typicode.com/posts"
+
+    response = requests.get(url)
+
+    if response.status_code == 200:
+        print("Success!")
+        data = response.json()
+        for post in data[:5]:
+            print(f"ID: {post['id']}, Title: {post['title']}")
+    else:
+        print(f"Error with the request: {response.status_code}")
--- a/libraries/seabor.py
+++ b/libraries/seabor.py
@ -0,0 +1,15 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+if __name__ == "__main__":
+    df = pd.read_csv("../resources/employee_data.csv")
+
+    plt.figure(figsize=(8, 6))
+    sns.boxplot(x='location', y='age', data=df)
+
+    plt.title('Distribution of age by location')
+    plt.xlabel('Location')
+    plt.ylabel('Age')
+
+    plt.show()
--- a/libraries/subproces.py
+++ b/libraries/subproces.py
--- a/psp/C/args/args.c
+++ b/psp/C/args/args.c
--- a/psp/C/fork/fork.c
+++ b/psp/C/fork/fork.c
--- a/psp/C/fork/fork2.c
+++ b/psp/C/fork/fork2.c
--- a/psp/C/fork/fork3.c
+++ b/psp/C/fork/fork3.c
--- a/psp/C/processes/pid.c
+++ b/psp/C/processes/pid.c
--- a/psp/C/processes/proceso1.c
+++ b/psp/C/processes/proceso1.c
--- a/psp/C/processes/proceso2.c
+++ b/psp/C/processes/proceso2.c
--- a/psp/C/processes/zombies.c
+++ b/psp/C/processes/zombies.c
--- a/psp/C/vars/variables
+++ b/psp/C/vars/variables
--- a/psp/C/vars/variables.c
+++ b/psp/C/vars/variables.c
--- a/psp/prueba.py
+++ b/psp/prueba.py
--- a/psp/threads/01.py
+++ b/psp/threads/01.py
--- a/psp/threads/02.py
+++ b/psp/threads/02.py
--- a/psp/threads/03.py
+++ b/psp/threads/03.py
--- a/psp/threads/04.py
+++ b/psp/threads/04.py
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,13 @@
+pytz~=2024.2
+uvicorn
+fastapi
+numpy
+pandas
+matplotlib
+seaborn
+requests
+bs4
+beautifulsoup4
+torch
+openai-whisper
+pyttsx3~=2.98
--- a/resources/employee_data.csv
+++ b/resources/employee_data.csv
--- a/utilities/convert_video_to_streaming.py
+++ b/utilities/convert_video_to_streaming.py
@ -0,0 +1,65 @@
+import os
+import subprocess
+
+
+def convert_to_hls_multires(input_file):
+    # Carpeta base = misma del input
+    base_dir = os.path.dirname(input_file)
+
+    renditions = [
+        {"name": "1080p", "scale": "1920:1080", "bitrate": "3500k", "maxrate": "3850k", "bufsize": "5250k",
+         "hls_time": 10},
+        {"name": "720p", "scale": "1280:720", "bitrate": "2800k", "maxrate": "2996k", "bufsize": "4200k",
+         "hls_time": 10},
+        {"name": "480p", "scale": "854:480", "bitrate": "1400k", "maxrate": "1498k", "bufsize": "2100k", "hls_time": 4},
+        {"name": "360p", "scale": "640:360", "bitrate": "800k", "maxrate": "856k", "bufsize": "1200k", "hls_time": 4},
+    ]
+
+    variant_playlist_lines = []
+
+    for r in renditions:
+        print(f"Procesando {r['name']}...")
+
+        out_path = os.path.join(base_dir, r["name"])
+        os.makedirs(out_path, exist_ok=True)
+        playlist_name = f"{r['name']}.m3u8"
+
+        command = [
+            "ffmpeg", "-y",
+            "-hwaccel", "cuda",
+            "-i", input_file,
+            "-vf", f"hwupload_cuda,scale_cuda={r['scale']}:format=yuv420p",
+            "-c:a", "aac", "-ar", "48000",
+            "-c:v", "h264_nvenc",
+            "-preset", "p4",
+            "-b:v", r["bitrate"],
+            "-maxrate", r["maxrate"],
+            "-bufsize", r["bufsize"],
+            "-g", "60",
+            "-sc_threshold", "0",
+            "-hls_time", str(r["hls_time"]),
+            "-hls_playlist_type", "vod",
+            "-hls_segment_filename", os.path.join(out_path, f"{r['name']}_%03d.ts"),
+            os.path.join(out_path, playlist_name)
+        ]
+
+        subprocess.run(command, check=True)
+
+        resolution = r["scale"]
+        bandwidth = r["bitrate"].replace("k", "000")
+        variant_playlist_lines.append(
+            f'#EXT-X-STREAM-INF:BANDWIDTH={bandwidth},RESOLUTION={resolution}\n{r["name"]}/{playlist_name}'
+        )
+
+    master_playlist_path = os.path.join(base_dir, "master.m3u8")
+    with open(master_playlist_path, "w") as f:
+        f.write("#EXTM3U\n")
+        for line in variant_playlist_lines:
+            f.write(line + "\n")
+
+    print(f"\n✅ HLS generado con éxito. Playlist maestro: {master_playlist_path}")
+
+
+if __name__ == "__main__":
+    input_file = r""
+    convert_to_hls_multires(input_file)
--- a/utilities/news_summarize.py
+++ b/utilities/news_summarize.py
@ -0,0 +1,141 @@
+import os
+import re
+import smtplib
+from collections import Counter
+from email.utils import formatdate
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def fetch_latest_news(article_position):
+    url = "https://www.genbeta.com/categoria/inteligencia-artificial"
+
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        raise Exception(f"Error fetching the URL. Code: {response.status_code}")
+
+    page = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
+
+    div_recent_list = page.find("div", class_="section-recent-list")
+
+    if not div_recent_list:
+        raise Exception("Doesn't find any recent list")
+
+    article = div_recent_list.find_all("article")[article_position]
+
+    first_news = article.find("a", href=True)
+
+    if not first_news:
+        raise Exception("Doesn't find any link.")
+
+    news_title = first_news.get_text(strip=True)
+
+    news_link = first_news["href"]
+
+    if not news_link.startswith("http"):
+        news_link = "https://www.genbeta.com" + news_link
+
+    return news_title, news_link
+
+
+def fetch_article(url):
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        raise Exception(f"Error fetching the article. Code: {response.status_code}")
+
+    page = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
+
+    article_body = page.find("div", {"class": "article-content"})
+
+    if not article_body:
+        raise Exception("Is not possible to find content for this artícle.")
+
+    paragraphs = article_body.find_all("p")
+
+    content = " ".join([p.get_text(strip=True) for p in paragraphs])
+
+    return content
+
+
+def summarize_article(text, sentences_count=3):
+    sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
+
+    sentences = [sentence for sentence in sentences if len(sentence.split()) > 5]
+
+    words = re.findall(r"\w+", text.lower())
+    word_frequencies = Counter(words)
+
+    sentence_scores = {
+        sentence: sum(word_frequencies.get(word.lower(), 0) for word in sentence.split())
+        for sentence in sentences
+    }
+
+    summarized_sentences = sorted(
+        sentence_scores.keys(),
+        key=lambda sentence: sentence_scores[sentence],
+        reverse=True
+    )[:sentences_count]
+
+    ordered_summary = sorted(
+        summarized_sentences,
+        key=lambda sentence: sentences.index(sentence)
+    )
+
+    return " ".join(ordered_summary)
+
+
+def send_email_with_summaries(message_body):
+    email_from = os.getenv("EMAIL_FROM")
+    email_to = os.getenv("EMAIL_TO")
+    password = os.getenv("EMAIL_PASSWORD")
+
+    subject = f"News summary: {formatdate(localtime=True)}"
+
+    message = f"Subject: {subject}\n\n{message_body}".encode("utf-8")
+
+    with smtplib.SMTP("smtp.gmail.com", 587) as server:
+        server.starttls()
+        server.login(email_from, password)
+        result = server.sendmail(email_from, email_to, message)
+        server.quit()
+
+        if result:
+            raise Exception("The email fail to send it")
+
+
+def main():
+    try:
+        summaries = ""
+        for i in range(0, 3):
+
+            news_title, news_link = fetch_latest_news(i)
+            summaries += f"New find: {news_title} ({news_link})"
+            print(f"New find: {news_title} ({news_link})")
+
+            content = fetch_article(news_link)
+
+            if content:
+                summary = summarize_article(content)
+                summaries += "\n=== Resume ===\n"
+                print("\n=== Resume ===\n")
+                lines = summary.split(". ")
+                for line in lines:
+                    summaries += f"- {line.strip()}.\n"
+                    print(f"- {line.strip()}.")
+
+                summaries += "\n" + "=" * 20 + "\n"
+                print("\n" + "=" * 20 + "\n")
+            else:
+                print("Is not possible to retrieve content for this new.")
+
+        # send_email_with_summaries(summaries)
+        print("Success!")
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    main()
--- a/utilities/subtitle_generation_vtt.py
+++ b/utilities/subtitle_generation_vtt.py
@ -0,0 +1,47 @@
+import os.path
+
+import torch
+import whisper
+
+
+def cuda_works():
+    print(torch.cuda.is_available())
+    if torch.cuda.is_available():
+        print(torch.cuda.get_device_name(0))
+
+
+def format_time_vtt(seconds):
+    ms = int((seconds - int(seconds)) * 1000)
+    s = int(seconds) % 60
+    m = (int(seconds) // 60) % 60
+    h = int(seconds) // 3600
+    return f"{h:02}:{m:02}:{s:02}.{ms:03}"  # VTT usa punto para los milisegundos
+
+
+def generate_subtitles_vtt(input_path, output_path):
+    model_name = "medium"
+    model = whisper.load_model(model_name)
+
+    result = model.transcribe(input_path, language="es")
+
+    master_output = os.path.join(output_path, "subtitles.vtt")
+
+    with open(master_output, "w", encoding="utf-8") as vtt_file:
+        vtt_file.write("WEBVTT\n\n")  # cabecera obligatoria VTT
+        for segment in result["segments"]:
+            start = segment["start"]
+            end = segment["end"]
+            text = segment["text"]
+
+            vtt_file.write(f"{format_time_vtt(start)} --> {format_time_vtt(end)}\n")
+            vtt_file.write(f"{text}\n\n")
+
+    print(f"Subtítulo VTT generado en: {master_output}")
+
+
+if __name__ == "__main__":
+    input_video = r""
+    output_vtt = r""
+
+    # cuda_works()
+    generate_subtitles_vtt(input_video, output_vtt)
--- a/utilities/subtitle_srt_to_vtt.py
+++ b/utilities/subtitle_srt_to_vtt.py
@ -0,0 +1,29 @@
+import os
+
+
+def srt_to_vtt(input_srt):
+    base_dir = os.path.dirname(input_srt)
+
+    output_vtt = os.path.join(base_dir, "subtitles.vtt")
+
+    with open(input_srt, "r", encoding="utf-8") as srt_file:
+        lines = srt_file.readlines()
+
+    with open(output_vtt, "w", encoding="utf-8") as vtt_file:
+        vtt_file.write("WEBVTT\n\n")  # cabecera obligatoria VTT
+
+        for line in lines:
+            # Convertir línea de tiempo
+            if "-->" in line:
+                line = line.replace(",", ".")
+            # Ignorar los índices de SRT (números de línea)
+            if line.strip().isdigit():
+                continue
+            vtt_file.write(line)
+
+    print(f"✅ Archivo VTT generado en: {output_vtt}")
+
+
+if __name__ == "__main__":
+    input_srt = r""
+    srt_to_vtt(input_srt)
--- a/utilities/subtitle_translation.py
+++ b/utilities/subtitle_translation.py
@ -0,0 +1,41 @@
+import torch
+import whisper
+
+
+def cuda_works():
+    print(torch.cuda.is_available())
+    print(torch.cuda.get_device_name(0))
+
+
+def format_time(seconds):
+    ms = int((seconds - int(seconds)) * 1000)
+    s = int(seconds) % 60
+    m = (int(seconds) // 60) % 60
+    h = int(seconds) // 3600
+    return f"{h:02}:{m:02}:{s:02},{ms:03}"
+
+
+def generate_subtitles(video_path):
+    model_name = "medium"
+    model = whisper.load_model(model_name)
+
+    result = model.transcribe(video_path, task="translate", language="es")
+
+    srt_path = "subtitles.srt"
+    with open(srt_path, "w", encoding="utf-8") as srt_file:
+        for segment in result["segments"]:
+            start = segment["start"]
+            end = segment["end"]
+            text = segment["text"]
+
+            srt_file.write(f"{segment['id'] + 1}\n")
+            srt_file.write(f"{format_time(start)} --> {format_time(end)}\n")
+            srt_file.write(f"{text}\n\n")
+
+    print(f"Subtítle generated in path: {srt_path}")
+
+
+if __name__ == "__main__":
+    # cuda_works()
+    generate_subtitles(
+        r"C:\Users\david\Videos\Social media\Cursos\Spring Security\Introduccion\Introducción Curso Spring Security Cero a Experto.mp4")
--- a/utilities/text_to_voice.py
+++ b/utilities/text_to_voice.py
@ -0,0 +1,58 @@
+import re
+
+import pyttsx3
+
+
+def show_voices_available():
+    engine = pyttsx3.init()
+    voices = engine.getProperty('voices')
+    for i, voice in enumerate(voices):
+        print(f"Voice {i}: {voice.name}, {voice.id}, {voice.languages}")
+
+
+def read_srt_file_in_blocks(subtitles, block_size=500):
+    subtitles_split = subtitles.split(". ")
+    for i in range(0, len(subtitles_split), block_size):
+        yield " ".join(subtitles_split[i:i + block_size])
+
+
+def process_in_blocks(engine, subtitles):
+    for i, text_block in enumerate(read_srt_file_in_blocks(subtitles)):
+        audio_file = f"audio_part_{i + 1}.mp3"
+        engine.save_to_file(text_block, audio_file)
+        print(f"Generating: {audio_file}")
+        engine.runAndWait()
+
+
+def process_all(engine, subtitles):
+    engine.save_to_file(subtitles, "audio.mp3")
+    engine.runAndWait()
+
+
+def generate_voice(text_path):
+    engine = pyttsx3.init()
+
+    rate = engine.getProperty('rate')
+    engine.setProperty('rate', rate - 50)
+
+    engine.setProperty('volume', 0.9)
+
+    voices = engine.getProperty('voices')
+    engine.setProperty('voice', voices[1].id)
+
+    with open(text_path, "r") as file:
+        content = file.read()
+
+        text = re.findall(
+            r"(?<=\n\n)(?:\d+\n)?\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.+?)(?=\n\n|\Z)", content,
+            re.DOTALL)
+        
+        subtitles = " ".join(text)
+
+        # process_in_blocks(engine, subtitles)
+        process_all(engine, subtitles)
+        print("Audio generated successfully!")
+
+
+if __name__ == "__main__":
+    generate_voice("subtitles.srt")
--- a/utilities/udemy_exports.py
+++ b/utilities/udemy_exports.py
@ -0,0 +1,32 @@
+import pandas as pd
+
+if __name__ == "__main__":
+
+    date_from = "2025-01-06"
+    date_to = "21-17-28"
+
+    print("""
+        Select some of the actions:
+        1.Get report from reviews
+        2.Get student list
+        3.Exit
+    """)
+
+    action = int(input())
+
+    file_path = ""
+
+    while action != 3:
+        match action:
+            case 1:
+                file_path = f"../resources/Udemy_Reviews_Export_{date_from}_{date_to}.csv"
+            case 2:
+                file_path = f"../resources/Students_List_Export_{date_from}_{date_to}.csv"
+            case 3:
+                exit(1)
+            case _:
+                print("Data does not match any specific condition")
+
+    df = pd.read_csv(file_path)
+
+    print(df.describe())