본문 바로가기

카테고리 없음

[python] 네이버 브랜드장보기 데이 크롤링

https://shopping.naver.com/market/necessity/home

 

네이버쇼핑 장보기

마트에서 시장 백화점까지 발품없는 현명한 쇼핑

shopping.naver.com

 

from selenium.webdriver.common.alert import Alert
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys 
import pyperclip
import bs4
from urllib.request import urlopen
from bs4 import BeautifulSoup
from html_table_parser import parser_functions as parser
from pprint import pprint
import requests
import getpass
import urllib.request
import random
from time import sleep
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
import time


chrome_options = webdriver.ChromeOptions()
driver = webdriver.Chrome('C:/chromedriver_win32/chromedriver.exe')
driver.get("https://shopping.naver.com/market/necessity/home")
driver.maximize_window()

now = datetime.now()
nowDate = now.strftime('%Y-%m-%d')
nowYear = now.strftime('%Y')

raw_info = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div')
a = raw_info.find_elements_by_tag_name('ul')[0]
b = int((a.text.count('\n') + 1)/2)+1

shop_dict = {'crawled_date':[]
                , 'event_date':[]
                , 'brand':[]
                , 'crawl_flag':[]
                , 'title_click':[]
                , 'schedule_click':[]
                , 'tag_click':[]
                , 'click_flag':[]}



for i in range (1, b):
    date_flag = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
    
    if date_flag == 'TODAY':
        driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).click()
        #클릭시 타이틀
        try:
            title = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/h3').text
            shop_dict['title_click'].append(title)
        except:
            shop_dict['title_click'].append('null')
        #클릭시 이벤트일정
        try:
            schedule = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[1]/em').text
            shop_dict['schedule_click'].append(schedule)
        except:
            shop_dict['schedule_click'].append('null')
        #클릭시 해시태그
        try:
            tag = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[2]').text
            shop_dict['tag_click'].append(tag)
        except:
            shop_dict['tag_click'].append('null')

        #클릭 후 다시 뒤로 
        driver.back()
        
        try:
            #크롤링한 날짜
            shop_dict['crawled_date'].append(nowDate)

            #이벤트 데이
            event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
            event_date = event_date.split(' ')[0].replace('/','-')
            event_date = str(nowYear) + '-'  + event_date
            if event_date == '2021-TODAY':
                event_date = nowDate
            shop_dict['event_date'].append(event_date)

            #브랜드명
            brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
            shop_dict['brand'].append(brand)
        
        except:
            shop_dict['crawled_date'] = 'null'
            shop_dict['event_date'] = 'null'
            shop_dict['brand'] = 'null'

    else:
        try:
            #크롤링한 날짜
            shop_dict['crawled_date'].append(nowDate)

            #이벤트 데이
            event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
            event_date = event_date.split(' ')[0].replace('/','-')
            event_date = str(nowYear) + '-'  + event_date
            if event_date == '2021-TODAY':
                event_date = nowDate
            shop_dict['event_date'].append(event_date)

            #브랜드명
            brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
            shop_dict['brand'].append(brand)
            
            #나머지 null값
            shop_dict['crawl_flag'] = 'S'
            shop_dict['click_flag'] = 'S'
            shop_dict['title_click'].append('null')
            shop_dict['schedule_click'].append('null')
            shop_dict['tag_click'].append('null')
            
        except:
            shop_dict['crawled_date'] = 'null'
            shop_dict['event_date'] = 'null'
            shop_dict['brand'] = 'null'
            shop_dict['crawl_flag'] = 'F'
            shop_dict['title_click'].append('null')
            shop_dict['schedule_click'].append('null')
            shop_dict['tag_click'].append('null')
            shop_dict['click_flag'] = 'F'



if driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/button'):
    
    driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/button').click()
    raw_info = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div')
    a = raw_info.find_elements_by_tag_name('ul')[0]
    b = int((a.text.count('\n') + 1)/2)+1

    for i in range (1, b):
        date_flag = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text

        if date_flag == 'TODAY':
            driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).click()
            #클릭시 타이틀
            try:
                title = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/h3').text
                shop_dict['title_click'].append(title)
            except:
                shop_dict['title_click'].append('null')
            #클릭시 이벤트일정
            try:
                schedule = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[1]/em').text
                shop_dict['schedule_click'].append(schedule)
            except:
                shop_dict['schedule_click'].append('null')
            #클릭시 해시태그
            try:
                tag = driver.find_element_by_xpath('//*[@id="content"]/div/div[2]/div[2]/div[2]').text
                shop_dict['tag_click'].append(tag)
            except:
                shop_dict['tag_click'].append('null')

            #클릭 후 다시 뒤로 
            driver.back()

            try:
                #크롤링한 날짜
                shop_dict['crawled_date'].append(nowDate)

                #이벤트 데이
                event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
                event_date = event_date.split(' ')[0].replace('/','-')
                event_date = str(nowYear) + '-'  + event_date
                if event_date == '2021-TODAY':
                    event_date = nowDate
                shop_dict['event_date'].append(event_date)

                #브랜드명
                brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
                shop_dict['brand'].append(brand)

            except:
                shop_dict['crawled_date'] = 'null'
                shop_dict['event_date'] = 'null'
                shop_dict['brand'] = 'null'

        else:
            try:
                #크롤링한 날짜
                shop_dict['crawled_date'].append(nowDate)

                #이벤트 데이
                event_date = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/strong' %i).text
                event_date = event_date.split(' ')[0].replace('/','-')
                event_date = str(nowYear) + '-'  + event_date
                if event_date == '2021-TODAY':
                    event_date = nowDate
                shop_dict['event_date'].append(event_date)

                #브랜드명
                brand = driver.find_element_by_xpath('//*[@id="content"]/div[3]/div[2]/div[4]/div/ul/li[%s]/a/div/strong' %i).text
                shop_dict['brand'].append(brand)

                #나머지 null값
                shop_dict['crawl_flag'] = 'S'
                shop_dict['click_flag'] = 'S'
                shop_dict['title_click'].append('null')
                shop_dict['schedule_click'].append('null')
                shop_dict['tag_click'].append('null')

            except:
                shop_dict['crawled_date'] = 'null'
                shop_dict['event_date'] = 'null'
                shop_dict['brand'] = 'null'
                shop_dict['crawl_flag'] = 'F'
                shop_dict['title_click'].append('null')
                shop_dict['schedule_click'].append('null')
                shop_dict['tag_click'].append('null')
                shop_dict['click_flag'] = 'F'
                
else:
    pass
    




shop_dict = pd.DataFrame.from_dict(shop_dict)
shop_dict



import pymssql

server = '서버'
database = '데이터베이스명'
username = '아이디'
password = '패스워드'

cnxn = pymssql.connect(server, username, password, database)
cursor = cnxn.cursor()


col_nm = str(shop_dict.columns.values.tolist())[1:-1].replace("'","").upper()
val_ct = str(['%s' for i in shop_dict.columns])[1:-1].replace("'","")
        
sql = 'insert into NVR_SHP_BRNDAY ('+col_nm+') values ('+val_ct+');'

cursor.executemany(sql, tuple([ tuple(i) for i in shop_dict.to_numpy()] ))

cnxn.commit()
cnxn.close()


time.sleep(3)
driver.quit()