본문 바로가기

6.Numpy | Pandas | Crawling

6/2(금) IT K-DT(64일차) / 3. 인스타그램 데이터 크롤링

 

 

3. 인스타그램 데이터 크롤링

 

3-1. 로그인

 

import chromedriver_autoinstaller
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
# 로그인, 비밀번호 입력 후 로그인

 

 

 

driver.implicitly_wait(3) 
# 웹 요소를 찾을 때 최대 3초 동안 대기하고, 3초 이내에 웹 요소가 나타나면 즉시 다음 동작을 수행.

   만약 3초가 지나도 웹 요소가 나타나지 않으면 NoSuchElement 예외가 발생.
url = 'https://www.instagram.com/'
driver.get(url)

id = 'junehee042@gmail.com'
pw = 'ckato******' # 뒷부분은 * 처리함

# ID와 PW의 XPath 경로
input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div
/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')

input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div
/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')

input_id.send_keys(id)
input_pw.send_keys(pw)

driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]
/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]').click()
time.sleep(5) # 5초정도 대기할 수 있도록 함.

 

3-2. 해시태그 검색

 

hashtag = '먹스타그램'
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
time.sleep(5)

 

 

3-3. 스크롤 내리기

 

for _ in range(5): # 5번만 내리기
    driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    time.sleep(5)

 

3-4. 원하는 사진 클릭하기

 

xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/article/div

/div/div/div[2]/div[2]/a/div[1]'
driver.find_element(By.XPATH, xpath).click()
time.sleep(3)

 

 

3-5. 좋아요 클릭하기

 

like_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/div
/div[1]/div/div[2]
/div/div[3]/div[1]/div[1]/span[1]/button'

driver.find_element(By.XPATH, like_xpath).click()
time.sleep(1)

 

3-6. 댓글달기

 

comment = '잘보고갑니다.'
comment_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main

/div/div[1]/div/div[2]/div/section/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click() 

# 한번 댓글창의 클릭을 해준 후 작성을 하는 것이 자연스러우므로, click을 삽입해줌.
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)

 

3-7. 전송버튼 클릭하기

 

send_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main
/div/div[1]/div/div[2]
/div/section/div/form/div/div[2]/div'

driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)

 

3-8. 다음버튼 클릭하기

 

next_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/div
/div[1]/div/div[1]
/div/div/div/div/div/div[1]/div[2]/div/button'

driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)

 

3-9. 위의 내용들을 함수로 리팩토링

 

def login(id, pw):
    input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]
/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
    input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]
/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
    input_id.send_keys(id)
    input_pw.send_keys(pw)
    driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]
/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]').click()
    time.sleep(3) 
    
def search(hashtag, scroll_times):
    url = f'https://www.instagram.com/explore/tags/{hashtag}/'
    driver.get(url)
    time.sleep(3)
    for _ in range(scroll_times):
        driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
        time.sleep(3)
    
def like_and_comment(nth, commnet, repeat=1):
    row = (nth-1) // 3 + 1
    col = (nth-1) % 3 + 1
    # 원하는 사진 클릭하기
    xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main
/article/div/div/div/div[2]/div[2]/a/div[1]'
    driver.find_element(By.XPATH, xpath).click()
    time.sleep(3)
    # 좋아요 클릭하기
    like_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main
/div/div[1]/div/div[2]/div/div[3]/div[1]/div[1]/span[1]/button'
    driver.find_element(By.XPATH, like_xpath).click()
    time.sleep(1)
    # 댓글달기
    comment_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section
/main/div/div[1]/div/div[2]/div/section/div/form/div/textarea'
    driver.find_element(By.XPATH, comment_xpath).click() 
    driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
    time.sleep(3)
    # 전송버튼 클릭하기
    send_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main
/div/div[1]/div/div[2]/div/section/div/form/div/div[2]/div'
    driver.find_element(By.XPATH, send_xpath).click()
    time.sleep(3)
        if i+1 < repeat
        next_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[1]
/div/div/div[2]/button'
        driver.find_element(By.XPATH, next_xpath).click()
        time.sleep(3)
driver = webdriver.Chrome()
driver.implicitly_wait(3)

url = 'https://www.instagram.com/'
driver.get(url)

id = 'junehee042@gmail.com'
pw = 'ckato******'

login(id, pw)
time.sleep(5)

hashtag = '사과'

search(hashtag,3)
time.sleep(5)

like_and_comment(7, '잘 보고 갑니다', 3)