wip
This commit is contained in:
parent
819bab2e5c
commit
fad7ec7aa8
|
@ -2,7 +2,7 @@
|
||||||
<module type="PYTHON_MODULE" version="4">
|
<module type="PYTHON_MODULE" version="4">
|
||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$" />
|
<content url="file://$MODULE_DIR$" />
|
||||||
<orderEntry type="jdk" jdkName="Python 3.11" jdkType="Python SDK" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
|
@ -3,5 +3,5 @@
|
||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.13" />
|
<option name="sdkName" value="Python 3.13" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
|
@ -1,6 +1,6 @@
|
||||||
from ejercicios.paralelismo02.ej01.ej01 import MiScraping
|
from ejercicios.paralelismo02.ej01.ej01 import MiScraping
|
||||||
|
|
||||||
hilo = MiScraping("http://localhost/mi_web")
|
hilo = MiScraping("http://localhost:8000/portada.html")
|
||||||
hilo.start()
|
hilo.start()
|
||||||
hilo.join()
|
hilo.join()
|
||||||
links = hilo.get_links()
|
links = hilo.get_links()
|
||||||
|
|
|
@ -11,7 +11,7 @@ class MiScraping(threading.Thread):
|
||||||
def run(self):
|
def run(self):
|
||||||
response = requests.get(self.url)
|
response = requests.get(self.url)
|
||||||
soup = BeautifulSoup(response.text,"html.parser")
|
soup = BeautifulSoup(response.text,"html.parser")
|
||||||
self.links = [a['href'] for a in soup.find_all('a', href=True) if re.match(r'http[s]?://', a['href'])]
|
self.links = [a['href'] for a in soup.find_all('a', href=True)]
|
||||||
def get_links(self):
|
def get_links(self):
|
||||||
return self.links
|
return self.links
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue