https://shopping.naver.com/market/necessity/home
네이버쇼핑 장보기
마트에서 시장 백화점까지 발품없는 현명한 쇼핑
shopping.naver.com
from selenium.webdriver.common.alert import Alert
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pyperclip
import bs4
from urllib.request import urlopen
from bs4 import BeautifulSoup
from html_table_parser import parser_functions as parser
from pprint import pprint
import requests
import getpass
import urllib.request
import random
from time import sleep
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
import time
chrome_options = webdriver.ChromeOptions()
driver = webdriver.Chrome('C:/chromedriver_win32/chromedriver.exe')
driver.get("https://shopping.naver.com/market/necessity/home")
driver.maximize_window()
now = datetime.now()
nowDate = now.strftime('%Y-%m-%d')
nowYear = now.strftime('%Y')
raw_info = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div')
a = raw_info.find_elements_by_tag_name('ul')[0]
b = int((a.text.count('\n') + 1)/2)+1
shop_dict = {'crawled_date':[]
, 'event_date':[]
, 'brand':[]
, 'crawl_flag':[]
, 'title_click':[]
, 'schedule_click':[]
, 'tag_click':[]
, 'click_flag':[]}
for i in range (1, b):
date_flag = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
if date_flag == 'TODAY':
driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).click()
#클릭시 타이틀
try:
title = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/h3').text
shop_dict['title_click'].append(title)
except:
shop_dict['title_click'].append('null')
#클릭시 이벤트일정
try:
schedule = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[1]/em').text
shop_dict['schedule_click'].append(schedule)
except:
shop_dict['schedule_click'].append('null')
#클릭시 해시태그
try:
tag = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[2]').text
shop_dict['tag_click'].append(tag)
except:
shop_dict['tag_click'].append('null')
#클릭 후 다시 뒤로
driver.back()
try:
#크롤링한 날짜
shop_dict['crawled_date'].append(nowDate)
#이벤트 데이
event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
event_date = event_date.split(' ')[0].replace('/','-')
event_date = str(nowYear) + '-' + event_date
if event_date == '2021-TODAY':
event_date = nowDate
shop_dict['event_date'].append(event_date)
#브랜드명
brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
shop_dict['brand'].append(brand)
except:
shop_dict['crawled_date'] = 'null'
shop_dict['event_date'] = 'null'
shop_dict['brand'] = 'null'
else:
try:
#크롤링한 날짜
shop_dict['crawled_date'].append(nowDate)
#이벤트 데이
event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
event_date = event_date.split(' ')[0].replace('/','-')
event_date = str(nowYear) + '-' + event_date
if event_date == '2021-TODAY':
event_date = nowDate
shop_dict['event_date'].append(event_date)
#브랜드명
brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
shop_dict['brand'].append(brand)
#나머지 null값
shop_dict['crawl_flag'] = 'S'
shop_dict['click_flag'] = 'S'
shop_dict['title_click'].append('null')
shop_dict['schedule_click'].append('null')
shop_dict['tag_click'].append('null')
except:
shop_dict['crawled_date'] = 'null'
shop_dict['event_date'] = 'null'
shop_dict['brand'] = 'null'
shop_dict['crawl_flag'] = 'F'
shop_dict['title_click'].append('null')
shop_dict['schedule_click'].append('null')
shop_dict['tag_click'].append('null')
shop_dict['click_flag'] = 'F'
if driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/button'):
driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/button').click()
raw_info = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div')
a = raw_info.find_elements_by_tag_name('ul')[0]
b = int((a.text.count('\n') + 1)/2)+1
for i in range (1, b):
date_flag = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
if date_flag == 'TODAY':
driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).click()
#클릭시 타이틀
try:
title = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/h3').text
shop_dict['title_click'].append(title)
except:
shop_dict['title_click'].append('null')
#클릭시 이벤트일정
try:
schedule = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[1]/em').text
shop_dict['schedule_click'].append(schedule)
except:
shop_dict['schedule_click'].append('null')
#클릭시 해시태그
try:
tag = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[2]').text
shop_dict['tag_click'].append(tag)
except:
shop_dict['tag_click'].append('null')
#클릭 후 다시 뒤로
driver.back()
try:
#크롤링한 날짜
shop_dict['crawled_date'].append(nowDate)
#이벤트 데이
event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
event_date = event_date.split(' ')[0].replace('/','-')
event_date = str(nowYear) + '-' + event_date
if event_date == '2021-TODAY':
event_date = nowDate
shop_dict['event_date'].append(event_date)
#브랜드명
brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
shop_dict['brand'].append(brand)
except:
shop_dict['crawled_date'] = 'null'
shop_dict['event_date'] = 'null'
shop_dict['brand'] = 'null'
else:
try:
#크롤링한 날짜
shop_dict['crawled_date'].append(nowDate)
#이벤트 데이
event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
event_date = event_date.split(' ')[0].replace('/','-')
event_date = str(nowYear) + '-' + event_date
if event_date == '2021-TODAY':
event_date = nowDate
shop_dict['event_date'].append(event_date)
#브랜드명
brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
shop_dict['brand'].append(brand)
#나머지 null값
shop_dict['crawl_flag'] = 'S'
shop_dict['click_flag'] = 'S'
shop_dict['title_click'].append('null')
shop_dict['schedule_click'].append('null')
shop_dict['tag_click'].append('null')
except:
shop_dict['crawled_date'] = 'null'
shop_dict['event_date'] = 'null'
shop_dict['brand'] = 'null'
shop_dict['crawl_flag'] = 'F'
shop_dict['title_click'].append('null')
shop_dict['schedule_click'].append('null')
shop_dict['tag_click'].append('null')
shop_dict['click_flag'] = 'F'
else:
pass
shop_dict = pd.DataFrame.from_dict(shop_dict)
shop_dict
import pymssql
server = '서버'
database = '데이터베이스명'
username = '아이디'
password = '패스워드'
cnxn = pymssql.connect(server, username, password, database)
cursor = cnxn.cursor()
col_nm = str(shop_dict.columns.values.tolist())[1:-1].replace("'","").upper()
val_ct = str(['%s' for i in shop_dict.columns])[1:-1].replace("'","")
sql = 'insert into NVR_SHP_BRNDAY ('+col_nm+') values ('+val_ct+');'
cursor.executemany(sql, tuple([ tuple(i) for i in shop_dict.to_numpy()] ))
cnxn.commit()
cnxn.close()
time.sleep(3)
driver.quit()