Pyton para enfermeiras (24)¶
Web Scraping (Data Mining)¶
In [1]:
# Biblioteca usada para requisitar uma página de um web site
import urllib.request
In [7]:
# Definimos a url a ser analisada - nosso periódico: Journal of Specialized Nursing Care (JSNC)
# Verifique as permissões em https://http://www.jsncare.uff.br/robots.txt
with urllib.request.urlopen("http://www.jsncare.uff.br/") as url:
page = url.read()
In [8]:
# Imprime o conteúdo da "page" (index, página raiz)
print(page)
b'\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n\t"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">\n<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n\t<title>Journal of Specialized Nursing Care</title>\n\t<meta name="description" content="" />\n\t<meta name="keywords" content="" />\n\t<meta name="generator" content="Open Journal Systems 2.4.8.2" />\n\t\n\t\t<link rel="stylesheet" href="http://www.jsncare.uff.br/lib/pkp/styles/pkp.css" type="text/css" />\n\t<link rel="stylesheet" href="http://www.jsncare.uff.br/lib/pkp/styles/common.css" type="text/css" />\n\t<link rel="stylesheet" href="http://www.jsncare.uff.br/styles/common.css" type="text/css" />\n\t<link rel="stylesheet" href="http://www.jsncare.uff.br/styles/compiled.css" type="text/css" />\n\n\t<!-- Base Jquery -->\n\t<script type="text/javascript" src="//www.google.com/jsapi"></script>\n\t\t<script type="text/javascript">\n\t\t\t<!--\n\t\t\t// Provide a local fallback if the CDN cannot be reached\n\t\t\tif (typeof google == \'undefined\') {\n\t\t\t\tdocument.write(unescape("%3Cscript src=\'http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/jquery.min.js\' type=\'text/javascript\'%3E%3C/script%3E"));\n\t\t\t\tdocument.write(unescape("%3Cscript src=\'http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/plugins/jqueryUi.min.js\' type=\'text/javascript\'%3E%3C/script%3E"));\n\t\t\t} else {\n\t\t\t\tgoogle.load("jquery", "1.4.4");\n\t\t\t\tgoogle.load("jqueryui", "1.8.6");\n\t\t\t}\n\t\t\t// -->\n\t\t</script>\n\t\n\t\n\t\n\t<link rel="stylesheet" href="http://www.jsncare.uff.br/styles/sidebar.css" type="text/css" />\t\t<link rel="stylesheet" href="http://www.jsncare.uff.br/styles/rightSidebar.css" type="text/css" />\t\n\t\t\t<link rel="stylesheet" href="http://www.jsncare.uff.br/plugins/blocks/languageToggle/styles/languageToggle.css" type="text/css" />\n\t\n\t<!-- Default global locale keys for JavaScript -->\n\t\n<script type="text/javascript">\n\tjQuery.pkp = jQuery.pkp || { };\n\tjQuery.pkp.locale = { };\n\t\t\t\n\t\t\t\tjQuery.pkp.locale.form_dataHasChanged = \'The data on this form has changed. Continue anyway?\';\n\t</script>\n\t<!-- Compiled scripts -->\n\t\t\t\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/plugins/jquery.tag-it.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/plugins/jquery.cookie.js"></script>\n\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/functions/fontController.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/functions/general.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/functions/jqueryValidatorI18n.js"></script>\n\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/classes/Helper.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/classes/ObjectProxy.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/classes/Handler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/classes/linkAction/LinkActionRequest.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/classes/features/Feature.js"></script>\n\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/SiteHandler.js"></script><!-- Included only for namespace definition -->\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/UrlInDivHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/AutocompleteHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/ExtrasOnDemandHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/form/FormHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/form/AjaxFormHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/form/ClientFormHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/grid/GridHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/controllers/linkAction/LinkActionHandler.js"></script>\n\n<script type="text/javascript" src="http://www.jsncare.uff.br/js/pages/search/SearchFormHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/js/statistics/ReportGeneratorFormHandler.js"></script>\n<script type="text/javascript" src="http://www.jsncare.uff.br/plugins/generic/lucene/js/LuceneAutocompleteHandler.js"></script>\n\n<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/plugins/jquery.pkp.js"></script>\t\n\t<!-- Form validation -->\n\t<script type="text/javascript" src="http://www.jsncare.uff.br/lib/pkp/js/lib/jquery/plugins/validate/jquery.validate.js"></script>\n\t<script type="text/javascript">\n\t\t<!--\n\t\t// initialise plugins\n\t\t\n\t\t$(function(){\n\t\t\tjqueryValidatorI18n("http://www.jsncare.uff.br", "en_US"); // include the appropriate validation localization\n\t\t\t\n\t\t\t$(".tagit").live(\'click\', function() {\n\t\t\t\t$(this).find(\'input\').focus();\n\t\t\t});\n\t\t});\n\t\t// -->\n\t\t\n\t</script>\n\n\t\t\n</head>\n<body id="pkp-common-openJournalSystems">\n<div id="container">\n\n<div id="header">\n<div id="headerTitle">\n<h1>\n\tJournal of Specialized Nursing Care\n</h1>\n</div>\n</div>\n\n<div id="body">\n\n\t<div id="sidebar">\n\t\t\t\t\t\t\t<div id="rightSidebar">\n\t\t\t\t<div class="block" id="sidebarDevelopedBy">\n\t<a class="blockTitle" href="http://pkp.sfu.ca/ojs/" id="developedBy">Open Journal Systems</a>\n</div><div class="block" id="sidebarHelp">\n\t<a class="blockTitle" href="javascript:openHelp(\'http://www.jsncare.uff.br/index.php/index/help/view/user/topic/000001\')">Journal Help</a>\n</div><div class="block" id="sidebarUser">\n\t\t\t<span class="blockTitle">User</span>\n\t\n\t\t\t\t\t\t\t\t\t\t\t\t<form method="post" action="http://www.jsncare.uff.br/index.php/index/login/signIn">\n\t\t\t\t\t<table>\n\t\t\t\t\t\t<tr>\n\t\t\t\t\t\t\t<td><label for="sidebar-username">Username</label></td>\n\t\t\t\t\t\t\t<td><input type="text" id="sidebar-username" name="username" value="" size="12" maxlength="32" class="textField" /></td>\n\t\t\t\t\t\t</tr>\n\t\t\t\t\t\t<tr>\n\t\t\t\t\t\t\t<td><label for="sidebar-password">Password</label></td>\n\t\t\t\t\t\t\t<td><input type="password" id="sidebar-password" name="password" value="" size="12" class="textField" /></td>\n\t\t\t\t\t\t</tr>\n\t\t\t\t\t\t<tr>\n\t\t\t\t\t\t\t<td colspan="2"><input type="checkbox" id="remember" name="remember" value="1" /> <label for="remember">Remember me</label></td>\n\t\t\t\t\t\t</tr>\n\t\t\t\t\t\t<tr>\n\t\t\t\t\t\t\t<td colspan="2"><input type="submit" value="Login" class="button" /></td>\n\t\t\t\t\t\t</tr>\n\t\t\t\t\t</table>\n\t\t\t\t</form>\n\t\t\t\t\t\t</div> \n<div class="block" id="sidebarLanguageToggle">\n\t<script type="text/javascript">\n\t\t<!--\n\t\tfunction changeLanguage() {\n\t\t\tvar e = document.getElementById(\'languageSelect\');\n\t\t\tvar new_locale = e.options[e.selectedIndex].value;\n\n\t\t\tvar redirect_url = \'http://www.jsncare.uff.br/index.php/index/user/setLocale/NEW_LOCALE?source=%2F\';\n\t\t\tredirect_url = redirect_url.replace("NEW_LOCALE", new_locale);\n\n\t\t\twindow.location.href = redirect_url;\n\t\t}\n\t\t//-->\n\t</script>\n\t<span class="blockTitle">Language</span>\n\t<form action="#">\n\t\t<label for="languageSelect">Select Language</label>\n\t\t<select id="languageSelect" size="1" name="locale" class="selectMenu"><option label="English" value="en_US" selected="selected">English</option>\n<option label="Espa\xc3\xb1ol (Espa\xc3\xb1a)" value="es_ES">Espa\xc3\xb1ol (Espa\xc3\xb1a)</option>\n<option label="Portugu\xc3\xaas (Brasil)" value="pt_BR">Portugu\xc3\xaas (Brasil)</option>\n</select>\n\t\t<input type="submit" class="button" value="Submit" onclick="changeLanguage(); return false;" />\n\t</form>\n</div>\n<div class="block" id="sidebarNavigation">\n\t<span class="blockTitle">Journal Content</span>\n\n\t\n\t\n\t<form id="simplesearchForm" action="http://www.jsncare.uff.br/index.php/index">\n\t\t\t\t<table id="simpleSearchInput">\n\t\t\t<tr>\n\t\t\t\t<td>\n\t\t\t\t\t\t\t\t\t\t\t\t\t<label for="simpleQuery">Search <br />\n\t\t\t\t\t<input type="text" id="simpleQuery" name="simpleQuery" size="15" maxlength="255" value="" class="textField" /></label>\n\t\t\t\t\t\t\t\t</td>\n\t\t\t</tr>\n\t\t\t<tr>\n\t\t\t\t<td><label for="searchField">\n\t\t\t\tSearch Scope\n\t\t\t\t<br />\n\t\t\t\t<select id="searchField" name="searchField" size="1" class="selectMenu">\n\t\t\t\t\t<option label="All" value="query">All</option>\n<option label="Authors" value="authors">Authors</option>\n<option label="Title" value="title">Title</option>\n<option label="Abstract" value="abstract">Abstract</option>\n<option label="Index terms" value="indexTerms">Index terms</option>\n<option label="Full Text" value="galleyFullText">Full Text</option>\n\n\t\t\t\t</select></label>\n\t\t\t\t</td>\n\t\t\t</tr>\n\t\t\t<tr>\n\t\t\t\t<td><input type="submit" value="Search" class="button" /></td>\n\t\t\t</tr>\n\t\t</table>\n\t</form>\n\n\t<br />\n\n\t</div>\n\n<!-- Add javascript required for font sizer -->\n<script type="text/javascript">\n\t<!--\n\t$(function(){\n\t\tfontSize("#sizer", "body", 9, 16, 32, ""); // Initialize the font sizer\n\t});\n\t// -->\n</script>\n\n<div class="block" id="sidebarFontSize" style="margin-bottom: 4px;">\n\t<span class="blockTitle">Font Size</span>\n\t<div id="sizer"></div>\n</div>\n<br />\n\t\t\t</div>\n\t\t\t</div>\n\n<div id="main">\n<div id="navbar">\n\t<ul class="menu">\n\t\t<li id="home"><a href="http://www.jsncare.uff.br/index.php/index/index">Home</a></li>\n\t\t<li id="about"><a href="http://www.jsncare.uff.br/index.php/index/about">About</a></li>\n\n\t\t\t\t\t<li id="login"><a href="http://www.jsncare.uff.br/index.php/index/login">Login</a></li>\n\t\t\t\t\t\t\t<li id="register"><a href="http://www.jsncare.uff.br/index.php/index/user/register">Register</a></li>\n\t\t\t\t\t\t\t\t\t\t\t\t<li id="search"><a href="http://www.jsncare.uff.br/index.php/index/search">Search</a></li>\n\t\t\n\t\t\n\t\t\t\t\n\n\t\t\t</ul>\n</div>\n<div id="breadcrumb">\n\t<a href="http://www.jsncare.uff.br/index.php/index/index">Home</a> >\n\t\t\t<a href="http://www.jsncare.uff.br/index.php" class="current">Journal of Specialized Nursing Care</a></div>\n\n<h2>Journal of Specialized Nursing Care</h2>\n\n\n<div id="content">\n\n\n<br />\n\n<div id="intro"><span style="font-size: 14pt; font-family: Verdana;"><span style="font-size: 10pt; font-family: Verdana;">a peer-review journal related to the specialized knowledge, skills, and experience identified by a nursing specialty to promote optimal health outcomes.</span></span></div>\n<a name="journals"></a>\n\n\n\t\t\t\t\t<div style="clear:left;">\n\t\t\t\t</div>\n\t\t\t\t<h3>Journal of Specialized Nursing Care</h3>\n\t\t\t\t\t\t\t<div class="journalDescription" id="journalDescription-2">\n\t\t\t\ta peer-review journal related to the specialized knowledge, skills, and experience identified by a nursing specialty to promote optimal health outcomes.\n\t\t\t</div>\n\t\t\t\t<p><a href="http://www.jsncare.uff.br/index.php/jsncare" class="action">View Journal</a> | <a href="http://www.jsncare.uff.br/index.php/jsncare/issue/current" class="action">Current Issue</a> | <a href="http://www.jsncare.uff.br/index.php/jsncare/user/register" class="action">Register</a></p>\n\t\t\t\t\t<div style="clear:left;">\n\t\t\t\t</div>\n\t\t\t\t<h3>Boletim NEPAE-NESEN</h3>\n\t\t\t\t\t\t\t<div class="journalDescription" id="journalDescription-3">\n\t\t\t\t<p class="MsoNormal" style="margin: 0cm 0cm 0pt; mso-pagination: none;"><span style="font-family: Verdana;"><span style="font-size: x-small;">O <strong>Boletim Eletr\xc3\xb4nico NEPAE-NESEN (BNN) </strong>\xc3\xa9 uma publica\xc3\xa7\xc3\xa3o informativa do<span style="mso-spacerun: yes;">\xc2\xa0 </span>N\xc3\xbacleo de Estudos e Pesquisas sobre as Atividades de Enfermagem (NEPAE) e do N\xc3\xbacleo de Estudos sobre Sa\xc3\xbade e Etnia Negra (NESEN), da </span></span></p><p class="MsoNormal" style="margin: 0cm 0cm 0pt; mso-pagination: none;"><span style="font-family: Verdana;"><span style="font-size: x-small;">Universidade Federal Fluminense (UFF).\xc2\xa0</span></span></p><p class="MsoNormal" style="margin: 0cm 0cm 3pt; mso-pagination: none;"><span style="font-family: Verdana;"><span style="font-size: x-small;">O BNN tem como miss\xc3\xa3o ampliar a rede de contatos entre a sociedade civil organizada e os estudantes e profissionais de sa\xc3\xbade, em especial os de enfermagem.</span></span><!-- /* Font Definitions */ @font-face \t{font-family:Verdana; \tpanose-1:2 11 6 4 3 5 4 4 2 4; \tmso-font-charset:0; \tmso-generic-font-family:swiss; \tmso-font-pitch:variable; \tmso-font-signature:536871559 0 0 0 415 0;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal \t{mso-style-parent:""; \tmargin:0cm; \tmargin-bottom:.0001pt; \tmso-pagination:widow-orphan; \tfont-size:12.0pt; \tfont-family:"Times New Roman"; \tmso-fareast-font-family:"Times New Roman";} @page Section1 \t{size:612.0pt 792.0pt; \tmargin:70.85pt 3.0cm 70.85pt 3.0cm; \tmso-header-margin:36.0pt; \tmso-footer-margin:36.0pt; \tmso-paper-source:0;} div.Section1 \t{page:Section1;} --><!--[if gte mso 10]> <mce:style><! /* Style Definitions */ table.MsoNormalTable \t{mso-style-name:"Tabela normal"; \tmso-tstyle-rowband-size:0; \tmso-tstyle-colband-size:0; \tmso-style-noshow:yes; \tmso-style-parent:""; \tmso-padding-alt:0cm 5.4pt 0cm 5.4pt; \tmso-para-margin:0cm; \tmso-para-margin-bottom:.0001pt; \tmso-pagination:widow-orphan; \tfont-size:10.0pt; \tfont-family:"Times New Roman"; \tmso-ansi-language:#0400; \tmso-fareast-language:#0400; \tmso-bidi-language:#0400;} > <! [endif] ></p><p class="MsoNormal"><span style="font-family: Verdana;" mce_style="font-family: Verdana;">\xc3\x89 uma publica\xc3\xa7\xc3\xa3o informativa do\xc2\xa0 N\xc3\xbacleo de Estudos e Pesquisas sobre as Atividades de Enfermagem (NEPAE) e do N\xc3\xbacleo de Estudos sobre Sa\xc3\xbade e Etnia Negra (NESEN), da </span>Universidade Federal Fluminense (UFF).</p> <p class="MsoNormal"><span style="font-family: Verdana;" mce_style="font-family: Verdana;">O BNN tem como miss\xc3\xa3o ampliar a rede de contatos entre a sociedade civil organizada e os estudantes e profissionais de sa\xc3\xbade, em especial os de enfermagem.</span></p>\xc2\xa0 <span><! [endif] ></span><p>\xc2\xa0< ><--></p>\n\t\t\t</div>\n\t\t\t\t<p><a href="http://www.jsncare.uff.br/index.php/bnn" class="action">View Journal</a> | <a href="http://www.jsncare.uff.br/index.php/bnn/issue/current" class="action">Current Issue</a> | <a href="http://www.jsncare.uff.br/index.php/bnn/user/register" class="action">Register</a></p>\n\n<div id="journalListPageInfo"></div>\n<div id="journalListPageLinks"></div>\n\n\n</div><!-- content -->\n</div><!-- main -->\n</div><!-- body -->\n\n\n\n</div><!-- container -->\n</body>\n</html>'
In [9]:
from bs4 import BeautifulSoup
In [10]:
# Analise o html na variável 'page' e armazene-o no formato Beautiful Soup
soup = BeautifulSoup(page, "html.parser")
In [11]:
# título
soup.title
Out[11]:
<title>Journal of Specialized Nursing Care</title>
In [12]:
# texto
soup.title.string
Out[12]:
'Journal of Specialized Nursing Care'
In [16]:
# hiperlink principal da "page"
soup.a
Out[16]:
<a class="blockTitle" href="http://pkp.sfu.ca/ojs/" id="developedBy">Open Journal Systems</a>
In [17]:
# todos os hiperlinks da "page"
soup.find_all("a")
Out[17]:
[<a class="blockTitle" href="http://pkp.sfu.ca/ojs/" id="developedBy">Open Journal Systems</a>, <a class="blockTitle" href="javascript:openHelp('http://www.jsncare.uff.br/index.php/index/help/view/user/topic/000001')">Journal Help</a>, <a href="http://www.jsncare.uff.br/index.php/index/index">Home</a>, <a href="http://www.jsncare.uff.br/index.php/index/about">About</a>, <a href="http://www.jsncare.uff.br/index.php/index/login">Login</a>, <a href="http://www.jsncare.uff.br/index.php/index/user/register">Register</a>, <a href="http://www.jsncare.uff.br/index.php/index/search">Search</a>, <a href="http://www.jsncare.uff.br/index.php/index/index">Home</a>, <a class="current" href="http://www.jsncare.uff.br/index.php">Journal of Specialized Nursing Care</a>, <a name="journals"></a>, <a class="action" href="http://www.jsncare.uff.br/index.php/jsncare">View Journal</a>, <a class="action" href="http://www.jsncare.uff.br/index.php/jsncare/issue/current">Current Issue</a>, <a class="action" href="http://www.jsncare.uff.br/index.php/jsncare/user/register">Register</a>, <a class="action" href="http://www.jsncare.uff.br/index.php/bnn">View Journal</a>, <a class="action" href="http://www.jsncare.uff.br/index.php/bnn/issue/current">Current Issue</a>, <a class="action" href="http://www.jsncare.uff.br/index.php/bnn/user/register">Register</a>]
In [18]:
tables = soup.find('table')
In [19]:
print(tables)
<table> <tr> <td><label for="sidebar-username">Username</label></td> <td><input class="textField" id="sidebar-username" maxlength="32" name="username" size="12" type="text" value=""/></td> </tr> <tr> <td><label for="sidebar-password">Password</label></td> <td><input class="textField" id="sidebar-password" name="password" size="12" type="password" value=""/></td> </tr> <tr> <td colspan="2"><input id="remember" name="remember" type="checkbox" value="1"/> <label for="remember">Remember me</label></td> </tr> <tr> <td colspan="2"><input class="button" type="submit" value="Login"/></td> </tr> </table>
busca de expressões regulares¶
In [20]:
# Importando o módulo re (regular expression) para manipulação do texto
# Esse módulo fornece operações com expressões regulares (ER)
import re
In [21]:
# Lista de termos para busca
lista_pesquisa = ['nursing', 'patient']
In [22]:
# Texto para o parse
texto = 'ICNP(2019)/SNOMED CT: nursing terminologies in intensive patient care'
In [23]:
# Exemplo básico de Data Mining
for item in lista_pesquisa:
print ('Buscando por "%s" em: \n\n"%s"' % (item, texto))
# Verificando se o termo de pesquisa existe no texto
if re.search(item, texto):
print ('\n')
print ('Palavra encontrada. \n')
print ('\n')
else:
print ('\n')
print ('Palavra não encontrada.')
print ('\n')
Buscando por "nursing" em: "ICNP(2019)/SNOMED CT: nursing terminologies in intensive patient care" Palavra encontrada. Buscando por "patient" em: "ICNP(2019)/SNOMED CT: nursing terminologies in intensive patient care" Palavra encontrada.
In [24]:
def encontra_padrao(lista, frase):
for item in lista:
print ('Pesquisando na frase: %r' %item)
print (re.findall(item, frase))
print ('\n')
In [33]:
#fonte da frase: http://www.revenfermeria.sld.cu/index.php/enf/article/view/1604/352
frase_padrao = 'O Diagnóstico de Enfermagem: risco de dignidade humana comprometida \
tem como fatores de risco a humilhação percebida e invasão percebida da privacidade, \
convergentes em uma perda de respeito e honra na sociedade.'
lista_padroes = [ 'dignidade*',
'respeito+']
In [34]:
encontra_padrao(lista_padroes, frase_padrao)
Pesquisando na frase: 'dignidade*' ['dignidade'] Pesquisando na frase: 'respeito+' ['respeito']
Apontamentos
- Não há apontamentos.
BNN - ISSN 1676-4893
Boletim do Núcleo de Estudos e Pesquisas sobre as Atividades de Enfermagem (NEPAE)e do Núcleo de Estudos sobre Saúde e Etnia Negra (NESEN).