๐ฉ PLAN
1. 7-Eleven ํธ์์ ๋งค์ฅ ์ ๋ณด ์น ํ์ด์ง์์ ํ์ด์ฌ์ ํตํด ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์จ๋ค.
2. ๊ฐ์ ธ์จ ๋ฐ์ดํฐ๋ฅผ DataFrame ์ผ๋ก ๋ง๋ค๊ณ , ๋ก์ปฌ์ pickle ํ์ผ๋ก ์ ์ฅํ๋ค.
[ ๋์ ์ฌ์ดํธ ]
๋จผ์ , ๋์ ์ฌ์ดํธ์ ๊ตฌ์กฐ๋ฅผ ์ดํด๋ณด์๋ค.
์ธ๋ธ์ผ๋ ๋ธ ๊ฐ์ ๊ฒฝ์ฐ, ๋ฉ์ธ ์น ํ์ด์ง์์ ์ ํฌ ์ฐพ๊ธฐ ๋ฒํผ์ ๋๋ฅด๋ฉด, ๋งค์ฅ ์ ๋ณด๋ฅผ ํ์ ํํ๋ก ์ ๊ณตํด์ฃผ๊ณ ์๋ค.
CU์ฒ๋ผ ์ง์ญ๋ณ๋ก ๊ฒ์ํด์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์์ผ ํ๋ ํ์์ด๋ค. (์/๋ > ๊ตฌ/๊ตฐ)
๊ฐ๋ฐ์ ๋ชจ๋๋ฅผ ํจ ํ, ์์๋ก ์์ธ > ์ค๊ตฌ ๋ก ์ ํํ ํ, ๊ฒ์ ๋ฒํผ์ ํตํด ์ฐ๋ฆฌ๊ฐ ์ํ๋ ๋ฐ์ดํฐ๋ฅผ ํ์ธํ ์ ์๋ค.
payload ๋ฅผ ์ดํด๋ณด๋ฉฐ ๋ณ๊ฒฝํด์ฃผ์ด์ผ ํ key ๋ฅผ ํ์ธํด๋ณด๊ณ , Preview๋ฅผ ํตํด ์ด๋ค ์์ผ๋ก ์ ๋ณด๋ฅผ ๋ฐ์ ์ ์๋์ง ๋ฏธ๋ฆฌ ํ์ธํ ์ ์๋ค.
๋ฐ์ดํฐ๋ฅผ ์์ฒญํด์ ๋ฐ๊ฒ ๋๋ฉด, ์์ ๊ฐ์ด html ํํ๋ก ์ ๋ณด๋ฅผ ์ฃผ๊ณ ์๋ค.
๊ทธ๋ฆฌ๊ณ ์/๋, ์/๊ตฐ/๊ตฌ ์ ๋ณด๋ api ๋ฅผ ํตํด, ์ค๋ฅธ์ชฝ ์ด๋ฏธ์ง์ ๊ฐ์ด ํ์ธํ์ฌ ์ป์ ์ ์๋ค.
๋๋ ์ด๋ฐ flow ๋ก ์ ๊ทผํ์ฌ ์๋์ ๊ฐ์ ์ฝ๋๋ก ์์ ํ๋ค.
โจ๏ธ Code
import requests
import pandas as pd
import json
import pickle
from tqdm import tqdm
from bs4 import BeautifulSoup as BS
# ์/๋ ๋ฐ์ดํฐ -> ์/๊ตฐ/๊ตฌ ๋ฐ์ดํฐ -> api ๋ฐ์ดํฐ
url = "https://www.7-eleven.co.kr/util/storeLayerPop.asp"
se = requests.get(url)
bs = BS(se.text)
sido = bs.select("#storeLaySido > option")
sido_total = []
for x in sido[1:]:
sido_total.append(x.string)
# ์/๊ตฐ/๊ตฌ ๋ฐ์ดํฐ
gugun_url = "https://www.7-eleven.co.kr/library/asp/StoreGetGugun.asp"
gugun_pay = {
"Sido": "",
"selName": "storeLayGu",
}
payload = {
"storeSido": "",
"storeLayGu": "",
"hiddentext" : "none",
}
store_name = []
store_address = []
store_service = []
store_df = pd.DataFrame()
for idx, y in enumerate(tqdm(sido_total)):
gugun_pay['Sido'] = y
se = requests.post(gugun_url, data=gugun_pay)
bs = BS(se.text)
gugun = bs.select("option")
gugun_total = []
for z in gugun[1:]:
gugun_total.append(z.string)
# ํ์์ ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
# ์ธ์ข
์ธ ๊ฒฝ์ฐ๋ ์์ธ์ฒ๋ฆฌ (์/๊ตฐ/๊ตฌ ์์)
if y == '์ธ์ข
':
payload['storeLaySido'] = y
payload['storeLayGu'] = ""
se = requests.post(url, data=payload)
bs = BS(se.text)
for v in bs.find("div", class_="list_stroe").findAll("li"):
if v.findAll("span")[0].text.strip() == "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.":
break
# ๋งค์ฅ๋ช
store_name.append(v.findAll("span")[0].text.strip())
# ๋งค์ฅ ์ฃผ์
store_address.append(v.findAll("span")[1].string.strip())
# ๋งค์ฅ ์๋น์ค
service = []
for ss in v.find("span").findAll("img"):
service.append(ss['alt'])
store_service.append(service)
else:
for idx, k in enumerate(tqdm(gugun_total)):
payload['storeLaySido'] = y
payload['storeLayGu'] = k
se = requests.post(url, data=payload)
bs = BS(se.text)
for t in bs.find("div", class_="list_stroe").findAll("li"):
if t.findAll("span")[0].text.strip() == "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.":
break
# ๋งค์ฅ๋ช
store_name.append(t.findAll("span")[0].text.strip())
# ๋งค์ฅ ์ฃผ์
store_address.append(t.findAll("span")[1].string.strip())
# ๋งค์ฅ ์๋น์ค
service = []
for s in t.find("span").findAll("img"):
service.append(s['alt'])
store_service.append(service)
store_df['๋งค์ฅ๋ช
'] = store_name
store_df['์ฃผ์'] = store_address
store_df['์๋น์ค'] = store_service
store_df.to_pickle("./7-ELEVEN_store_service.pkl")
๐ DataFrame
์ ์ฝ๋๋ฅผ ํตํด, ์ต์ข DataFrame ์ ์๋์ ๊ฐ์ ํํ๋ก ์ ์ฅํ ์ ์๋ค.
'Python > [์น ํฌ๋กค๋ง]' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[ํธ์์ ํฌ๋กค๋ง] EMERT24 (0) | 2023.03.24 |
---|---|
[ํธ์์ ํฌ๋กค๋ง] MINISTOP (0) | 2023.03.24 |
[ํธ์์ ํฌ๋กค๋ง] CU (0) | 2023.03.24 |
[ํธ์์ ํฌ๋กค๋ง] GS25 (0) | 2023.03.24 |