Within the beneath code, “self.htmlList” record is printing the physique / phrases a number of occasions and I can not take out final record worth.
from urllib.request import urlopen, Request
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.htmlList = []
self.wordDict = {}
def handle_data(self, knowledge):
phrases = knowledge.strip().break up()
for i in phrases:
if i.isalpha() == True: # Not Working
self.htmlList.append(i)
return self.htmlList
self.handleContent(self.htmlList)
def handleContent(self,knowledge):
for phrase in knowledge:
if phrase in self.wordDict:
self.wordDict[word] += 1
else:
self.wordDict[word] = 1
print(self.wordDict)
#return self.wordDict
if identify == “major“:
url = ‘http://www.shortreckonings.com’
headers={‘Person-Agent’: ‘Mozilla/5.0 (Home windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36’}
response = urlopen(Request(url,headers=headers))
htmlContent = response.learn()
htmlContent = htmlContent.decode(“utf-8”)
response.shut()
parser = MyHTMLParser()
parser.feed(htmlContent)