playground

This commit is contained in:
javiermengual 2025-11-17 15:18:32 +01:00
parent 6850b8d91e
commit e822a36255
39 changed files with 10818 additions and 0 deletions

13
README.md Normal file
View File

@ -0,0 +1,13 @@
- **Ejercicios básicos**: Incluye problemas de sintaxis, estructuras de control, manipulación de colecciones, y
funciones.
- **Exploración de librerías**: Ejemplos y ejercicios con librerías populares como:
- **Pandas**: Manipulación y análisis de datos.
- **Matplotlib**: Visualización de datos.
- **FastAPI**: Creación de APIs rápidas y eficientes.
- **Functools**: Trabajo con funciones de orden superior como `map`, `filter` y `reduce`.
---

11
basics/classes.py Normal file
View File

@ -0,0 +1,11 @@
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
if __name__ == '__main__':
person = Person("David", 25)
print(f"My name is {person.name}")

12
basics/conditions.py Normal file
View File

@ -0,0 +1,12 @@
if __name__ == '__main__':
x = 5
y = 2
if x % 2 == 0:
print("Even")
else:
print("Odd")
result = "Even" if y % 2 == 0 else "Odd"
print(result)

112
basics/data_structures.py Normal file
View File

@ -0,0 +1,112 @@
import queue
from array import array
from collections import deque, Counter
# Collections ordered with mutable data, can contain different types of data
def lists():
data = [1, 2, 3, 4, 5, "David"]
data.remove(5)
print(f"List with element removed: {data}")
data.append(5)
for datum in data:
print(f"Number {datum}")
print(f"List sliced: {data[1:3]}")
print(f"List reversed: {data[::-1]}")
# Collections ordered with immutable data, can contain different types of data
def tuples():
data = (24, "David", 20, "Alberto")
print(f"Tuples {data}")
print(f"First element: {data[0]}")
# Collections unordered with immutable data, can contain different types of data
def sets():
data = {1, 2, 3, 4, 5, 6}
data2 = {1, 2, 3, 4, 5}
print(f"Set of data: {data}")
intersection = data.intersection(data2)
union = data.union(data2)
difference = data.difference(data2)
print(f"Intersection {intersection}")
print(f"Union {union}")
print(f"Difference {difference}")
# Collections of key-value
def dictionaries():
data = {"David": 25, "Alberto": 20}
data["John"] = 80
data.pop("Alberto")
print(f"Data {data}")
print(f"Data for David {data["David"]}")
def strings():
data = "Hello world!"
print(f"Data {data}")
replace = data.replace("world", "David")
print(f"Data replaced {replace}")
def collections():
my_deque = deque([1, 2, 3])
my_deque.popleft()
my_counter = Counter(['apple', 'banana', 'apple'])
most_common = my_counter.most_common()
print(f"Data queue {my_deque}")
print(f"Data counter items {my_counter}")
print(f"Data counter most common {most_common}")
def arrays():
my_array = array('i', [1, 2, 3, 4])
print(f"Data {my_array}")
print(f"First data {my_array[0]}")
def queues():
my_queue = queue.Queue()
my_queue.put(1)
my_queue.put(2)
print(f"First Data {my_queue.get()}")
def manipulate_list():
names = ["david", "alberto", "john"]
mapped = [name.upper() for name in names]
filtered = [name for name in mapped if name.startswith("D")]
print(filtered)
def manipulate_list2():
names = ["david", "alberto", "john"]
mapped = list(map(lambda name: name.upper(), names))
filtered = list(filter(lambda name: name.startswith("D"), mapped))
print(filtered)
if __name__ == '__main__':
# lists()
manipulate_list()

52
basics/dates.py Normal file
View File

@ -0,0 +1,52 @@
from datetime import datetime, timedelta
import pytz
def iso_format(date_string: str) -> datetime:
return datetime.fromisoformat(date_string)
def str_format(date_string: str, date_format: str) -> datetime:
return datetime.strptime(date_string, date_format)
def apply_format(date_time: datetime, date_format: str) -> str:
return date_time.strftime(date_format)
def add_days(date_time, days_to_add):
return date_time + timedelta(days=days_to_add)
def difference_days(date_time_one, date_time_two):
return date_time_two - date_time_one
def now_in_time_zone(time_zone):
zone = pytz.timezone(time_zone)
return datetime.now(zone)
if __name__ == "__main__":
date = datetime(2025, 1, 5)
print(f"The date created is {date}")
iso_format = iso_format("2024-12-12")
print(f"The date with iso format is {iso_format}")
now = datetime.now()
print(f"Today is {now}")
string_format = str_format("12/12/2024", "%d/%m/%Y")
formated = now.strftime("%d/%m/%Y %H:%M:%S")
print(f"The date formated is {formated}")
tomorrow = add_days(now, 1)
print(f"Today + 5 days is {tomorrow}")
difference = difference_days(now, tomorrow)
print(f"Tomorrow minus today is {difference}")
now_mexico = now_in_time_zone('America/Mexico_City')
print(f"Today is {now_mexico} in Mexico")

29
basics/files.py Normal file
View File

@ -0,0 +1,29 @@
def read(file):
for line in file:
print(line)
def write(file):
file.write("\nHello, this is a file created with Python!")
def use_file(path, file_operation):
try:
with open(path, file_operation, encoding="utf-8") as file:
match file_operation:
case "r":
read(file)
case "w":
write(file)
case "a":
write(file)
except FileNotFoundError:
print("The file not exist")
if __name__ == "__main__":
use_file("resources/employee_data.csv", "r")
# use_file("../resources/hello.txt", "w")
# use_file("../resources/hello.txt", "a")

6
basics/hello.py Normal file
View File

@ -0,0 +1,6 @@
def print_hi(name):
print(f'Hi, {name}')
if __name__ == '__main__':
print_hi('Hello world!')

26
libraries/api.py Normal file
View File

@ -0,0 +1,26 @@
import pandas as pd
import uvicorn
from fastapi import FastAPI
app = FastAPI()
def get_employees():
df = pd.read_csv("resources/employee_data.csv")
head = df.head(10)
data = head.to_dict(orient="records")
return data
@app.get("/")
async def read_root():
return {"message": "Hello, world!"}
@app.get("/employee")
async def read_root():
return get_employees()
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)

39
libraries/matplot.py Normal file
View File

@ -0,0 +1,39 @@
import matplotlib.pyplot as plt
import pandas as pd
def salary_vs_age(dfr, plot):
plot.figure(figsize=(8, 6))
plot.scatter(dfr['age'], dfr['salary'], color='blue')
plot.title('Salary vs Age')
plot.xlabel('Age')
plot.ylabel('Salary')
plot.grid(True)
plot.show()
def salary_histogram(dfr, plot):
dfr['salary'].hist()
plot.show()
def avg_salary_by_department(dfr, plot):
avg_salary_by_dept = dfr.groupby('department')['salary'].mean()
plot.figure(figsize=(8, 6))
avg_salary_by_dept.plot(kind='bar', color='green')
plot.title('Salary mean by department')
plot.xlabel('Department')
plot.ylabel('Salary mean')
plot.xticks(rotation=45)
plot.show()
if __name__ == "__main__":
df = pd.read_csv("../resources/employee_data.csv")
salary_vs_age(df, plt)
# salary_histogram(df, plt)
# avg_salary_by_department(df, plt)

6
libraries/nump.py Normal file
View File

@ -0,0 +1,6 @@
import numpy as np
if __name__ == "__main__":
randint = np.random.randint(1, 100)
print(f"The random number is {randint}")

55
libraries/panda.py Normal file
View File

@ -0,0 +1,55 @@
import pandas as pd
from pandas.core.interchange.dataframe_protocol import DataFrame
def simple_data():
dataset = {
'cars': ["BMW", "Volvo", "Ford"],
'passings': [3, 7, 2]
}
dfr = pd.DataFrame(dataset)
print("Simple df")
print(dfr)
data_where = dfr.query("passings > 2")[['cars']]
# data_where = df[df["passings"] > 2]
# data_where = df.where(df["passings"] > 2, other="X")
return data_where
def employee_max_salary(df: DataFrame):
return df.loc[df["salary"].idxmax()]
def employee_greater_salary(df: DataFrame):
return df.query("salary > 10.000")[["first_name", "last_name", "salary", "location"]]
def employee_full_name(df):
return df["first_name"].str.cat(df["last_name"], sep=" ")
def employee_greater_salary_by_location(df: DataFrame):
return df.groupby("location").apply(lambda group: group.loc[group['salary'].idxmax()])
def change_date_format(df: DataFrame):
df['hire_date'] = pd.to_datetime(df['hire_date'])
return df['hire_date'].dt.strftime('%d/%m/%Y')
if __name__ == "__main__":
df = pd.read_csv("../resources/employee_data.csv")
# data = simple_data()
# higher_than_salary = employee_greater_salary(df)
# max_salary = employee_max_salary(df)
# df["full_name"] = employee_full_name(df)
# top_employee_by_city = employee_greater_salary_by_location(df)
# df['hire_date'] = change_date_format(df)
# print(df.describe())
print(df.head())

0
libraries/pil.py Normal file
View File

15
libraries/request.py Normal file
View File

@ -0,0 +1,15 @@
import requests
if __name__ == "__main__":
url = "https://jsonplaceholder.typicode.com/posts"
response = requests.get(url)
if response.status_code == 200:
print("Success!")
data = response.json()
for post in data[:5]:
print(f"ID: {post['id']}, Title: {post['title']}")
else:
print(f"Error with the request: {response.status_code}")

15
libraries/seabor.py Normal file
View File

@ -0,0 +1,15 @@
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
if __name__ == "__main__":
df = pd.read_csv("../resources/employee_data.csv")
plt.figure(figsize=(8, 6))
sns.boxplot(x='location', y='age', data=df)
plt.title('Distribution of age by location')
plt.xlabel('Location')
plt.ylabel('Age')
plt.show()

0
libraries/subproces.py Normal file
View File

13
requirements.txt Normal file
View File

@ -0,0 +1,13 @@
pytz~=2024.2
uvicorn
fastapi
numpy
pandas
matplotlib
seaborn
requests
bs4
beautifulsoup4
torch
openai-whisper
pyttsx3~=2.98

10001
resources/employee_data.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
import os
import subprocess
def convert_to_hls_multires(input_file):
# Carpeta base = misma del input
base_dir = os.path.dirname(input_file)
renditions = [
{"name": "1080p", "scale": "1920:1080", "bitrate": "3500k", "maxrate": "3850k", "bufsize": "5250k",
"hls_time": 10},
{"name": "720p", "scale": "1280:720", "bitrate": "2800k", "maxrate": "2996k", "bufsize": "4200k",
"hls_time": 10},
{"name": "480p", "scale": "854:480", "bitrate": "1400k", "maxrate": "1498k", "bufsize": "2100k", "hls_time": 4},
{"name": "360p", "scale": "640:360", "bitrate": "800k", "maxrate": "856k", "bufsize": "1200k", "hls_time": 4},
]
variant_playlist_lines = []
for r in renditions:
print(f"Procesando {r['name']}...")
out_path = os.path.join(base_dir, r["name"])
os.makedirs(out_path, exist_ok=True)
playlist_name = f"{r['name']}.m3u8"
command = [
"ffmpeg", "-y",
"-hwaccel", "cuda",
"-i", input_file,
"-vf", f"hwupload_cuda,scale_cuda={r['scale']}:format=yuv420p",
"-c:a", "aac", "-ar", "48000",
"-c:v", "h264_nvenc",
"-preset", "p4",
"-b:v", r["bitrate"],
"-maxrate", r["maxrate"],
"-bufsize", r["bufsize"],
"-g", "60",
"-sc_threshold", "0",
"-hls_time", str(r["hls_time"]),
"-hls_playlist_type", "vod",
"-hls_segment_filename", os.path.join(out_path, f"{r['name']}_%03d.ts"),
os.path.join(out_path, playlist_name)
]
subprocess.run(command, check=True)
resolution = r["scale"]
bandwidth = r["bitrate"].replace("k", "000")
variant_playlist_lines.append(
f'#EXT-X-STREAM-INF:BANDWIDTH={bandwidth},RESOLUTION={resolution}\n{r["name"]}/{playlist_name}'
)
master_playlist_path = os.path.join(base_dir, "master.m3u8")
with open(master_playlist_path, "w") as f:
f.write("#EXTM3U\n")
for line in variant_playlist_lines:
f.write(line + "\n")
print(f"\n✅ HLS generado con éxito. Playlist maestro: {master_playlist_path}")
if __name__ == "__main__":
input_file = r""
convert_to_hls_multires(input_file)

141
utilities/news_summarize.py Normal file
View File

@ -0,0 +1,141 @@
import os
import re
import smtplib
from collections import Counter
from email.utils import formatdate
import requests
from bs4 import BeautifulSoup
def fetch_latest_news(article_position):
url = "https://www.genbeta.com/categoria/inteligencia-artificial"
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Error fetching the URL. Code: {response.status_code}")
page = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
div_recent_list = page.find("div", class_="section-recent-list")
if not div_recent_list:
raise Exception("Doesn't find any recent list")
article = div_recent_list.find_all("article")[article_position]
first_news = article.find("a", href=True)
if not first_news:
raise Exception("Doesn't find any link.")
news_title = first_news.get_text(strip=True)
news_link = first_news["href"]
if not news_link.startswith("http"):
news_link = "https://www.genbeta.com" + news_link
return news_title, news_link
def fetch_article(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Error fetching the article. Code: {response.status_code}")
page = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
article_body = page.find("div", {"class": "article-content"})
if not article_body:
raise Exception("Is not possible to find content for this artícle.")
paragraphs = article_body.find_all("p")
content = " ".join([p.get_text(strip=True) for p in paragraphs])
return content
def summarize_article(text, sentences_count=3):
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
sentences = [sentence for sentence in sentences if len(sentence.split()) > 5]
words = re.findall(r"\w+", text.lower())
word_frequencies = Counter(words)
sentence_scores = {
sentence: sum(word_frequencies.get(word.lower(), 0) for word in sentence.split())
for sentence in sentences
}
summarized_sentences = sorted(
sentence_scores.keys(),
key=lambda sentence: sentence_scores[sentence],
reverse=True
)[:sentences_count]
ordered_summary = sorted(
summarized_sentences,
key=lambda sentence: sentences.index(sentence)
)
return " ".join(ordered_summary)
def send_email_with_summaries(message_body):
email_from = os.getenv("EMAIL_FROM")
email_to = os.getenv("EMAIL_TO")
password = os.getenv("EMAIL_PASSWORD")
subject = f"News summary: {formatdate(localtime=True)}"
message = f"Subject: {subject}\n\n{message_body}".encode("utf-8")
with smtplib.SMTP("smtp.gmail.com", 587) as server:
server.starttls()
server.login(email_from, password)
result = server.sendmail(email_from, email_to, message)
server.quit()
if result:
raise Exception("The email fail to send it")
def main():
try:
summaries = ""
for i in range(0, 3):
news_title, news_link = fetch_latest_news(i)
summaries += f"New find: {news_title} ({news_link})"
print(f"New find: {news_title} ({news_link})")
content = fetch_article(news_link)
if content:
summary = summarize_article(content)
summaries += "\n=== Resume ===\n"
print("\n=== Resume ===\n")
lines = summary.split(". ")
for line in lines:
summaries += f"- {line.strip()}.\n"
print(f"- {line.strip()}.")
summaries += "\n" + "=" * 20 + "\n"
print("\n" + "=" * 20 + "\n")
else:
print("Is not possible to retrieve content for this new.")
# send_email_with_summaries(summaries)
print("Success!")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,47 @@
import os.path
import torch
import whisper
def cuda_works():
print(torch.cuda.is_available())
if torch.cuda.is_available():
print(torch.cuda.get_device_name(0))
def format_time_vtt(seconds):
ms = int((seconds - int(seconds)) * 1000)
s = int(seconds) % 60
m = (int(seconds) // 60) % 60
h = int(seconds) // 3600
return f"{h:02}:{m:02}:{s:02}.{ms:03}" # VTT usa punto para los milisegundos
def generate_subtitles_vtt(input_path, output_path):
model_name = "medium"
model = whisper.load_model(model_name)
result = model.transcribe(input_path, language="es")
master_output = os.path.join(output_path, "subtitles.vtt")
with open(master_output, "w", encoding="utf-8") as vtt_file:
vtt_file.write("WEBVTT\n\n") # cabecera obligatoria VTT
for segment in result["segments"]:
start = segment["start"]
end = segment["end"]
text = segment["text"]
vtt_file.write(f"{format_time_vtt(start)} --> {format_time_vtt(end)}\n")
vtt_file.write(f"{text}\n\n")
print(f"Subtítulo VTT generado en: {master_output}")
if __name__ == "__main__":
input_video = r""
output_vtt = r""
# cuda_works()
generate_subtitles_vtt(input_video, output_vtt)

View File

@ -0,0 +1,29 @@
import os
def srt_to_vtt(input_srt):
base_dir = os.path.dirname(input_srt)
output_vtt = os.path.join(base_dir, "subtitles.vtt")
with open(input_srt, "r", encoding="utf-8") as srt_file:
lines = srt_file.readlines()
with open(output_vtt, "w", encoding="utf-8") as vtt_file:
vtt_file.write("WEBVTT\n\n") # cabecera obligatoria VTT
for line in lines:
# Convertir línea de tiempo
if "-->" in line:
line = line.replace(",", ".")
# Ignorar los índices de SRT (números de línea)
if line.strip().isdigit():
continue
vtt_file.write(line)
print(f"✅ Archivo VTT generado en: {output_vtt}")
if __name__ == "__main__":
input_srt = r""
srt_to_vtt(input_srt)

View File

@ -0,0 +1,41 @@
import torch
import whisper
def cuda_works():
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
def format_time(seconds):
ms = int((seconds - int(seconds)) * 1000)
s = int(seconds) % 60
m = (int(seconds) // 60) % 60
h = int(seconds) // 3600
return f"{h:02}:{m:02}:{s:02},{ms:03}"
def generate_subtitles(video_path):
model_name = "medium"
model = whisper.load_model(model_name)
result = model.transcribe(video_path, task="translate", language="es")
srt_path = "subtitles.srt"
with open(srt_path, "w", encoding="utf-8") as srt_file:
for segment in result["segments"]:
start = segment["start"]
end = segment["end"]
text = segment["text"]
srt_file.write(f"{segment['id'] + 1}\n")
srt_file.write(f"{format_time(start)} --> {format_time(end)}\n")
srt_file.write(f"{text}\n\n")
print(f"Subtítle generated in path: {srt_path}")
if __name__ == "__main__":
# cuda_works()
generate_subtitles(
r"C:\Users\david\Videos\Social media\Cursos\Spring Security\Introduccion\Introducción Curso Spring Security Cero a Experto.mp4")

View File

@ -0,0 +1,58 @@
import re
import pyttsx3
def show_voices_available():
engine = pyttsx3.init()
voices = engine.getProperty('voices')
for i, voice in enumerate(voices):
print(f"Voice {i}: {voice.name}, {voice.id}, {voice.languages}")
def read_srt_file_in_blocks(subtitles, block_size=500):
subtitles_split = subtitles.split(". ")
for i in range(0, len(subtitles_split), block_size):
yield " ".join(subtitles_split[i:i + block_size])
def process_in_blocks(engine, subtitles):
for i, text_block in enumerate(read_srt_file_in_blocks(subtitles)):
audio_file = f"audio_part_{i + 1}.mp3"
engine.save_to_file(text_block, audio_file)
print(f"Generating: {audio_file}")
engine.runAndWait()
def process_all(engine, subtitles):
engine.save_to_file(subtitles, "audio.mp3")
engine.runAndWait()
def generate_voice(text_path):
engine = pyttsx3.init()
rate = engine.getProperty('rate')
engine.setProperty('rate', rate - 50)
engine.setProperty('volume', 0.9)
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id)
with open(text_path, "r") as file:
content = file.read()
text = re.findall(
r"(?<=\n\n)(?:\d+\n)?\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.+?)(?=\n\n|\Z)", content,
re.DOTALL)
subtitles = " ".join(text)
# process_in_blocks(engine, subtitles)
process_all(engine, subtitles)
print("Audio generated successfully!")
if __name__ == "__main__":
generate_voice("subtitles.srt")

View File

@ -0,0 +1,32 @@
import pandas as pd
if __name__ == "__main__":
date_from = "2025-01-06"
date_to = "21-17-28"
print("""
Select some of the actions:
1.Get report from reviews
2.Get student list
3.Exit
""")
action = int(input())
file_path = ""
while action != 3:
match action:
case 1:
file_path = f"../resources/Udemy_Reviews_Export_{date_from}_{date_to}.csv"
case 2:
file_path = f"../resources/Students_List_Export_{date_from}_{date_to}.csv"
case 3:
exit(1)
case _:
print("Data does not match any specific condition")
df = pd.read_csv(file_path)
print(df.describe())