반응형
책에 나오대로 아래 처럼하면
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
url = 'https://finance.naver.com/item/sise_day.nhn?code=068270&page=1'
with urlopen(url) as doc:
html = BeautifulSoup(doc, 'lxml')
pgrr = html.find('td', class_='pgRR')
s = str(pgrr.a['href']).split('=')
last_page = s[-1]
이렇게 나온다.
'NoneType' object has no attribute 'a'
네이버에서 무분별한 스크래핑을 막기 위해
패킷헤더에 브라우저 정보가 없으면 접근을 차단하고 있다.
따라서 아래처럼 해 주면 된다.
import pandas as pd
from matplotlib import pyplot as plt
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
url = 'https://finance.naver.com/item/sise_day.nhn?code=068270&page=1'
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urlopen(req) as doc:
html = BeautifulSoup(doc, 'lxml')
pgrr = html.find('td', class_='pgRR')
s = str(pgrr.a['href']).split('=')
last_page = s[-1]
print(last_page)
반응형