파이썬으로 인기 검색어 추출하기 완벽 가이드

블로그 글, 뉴스 기사, 상품 설명에서 핵심 키워드를 자동으로 추출하고 싶으신가요? SEO 최적화나 데이터 분석을 위해 중요한 단어들을 찾아야 하나요? 파이썬을 사용하면 텍스트에서 자동으로 키워드를 추출할 수 있습니다. 오늘은 네이버, 구글 검색 키워드부터 텍스트 분석 키워드까지 모든 방법을 알려드립니다.

Table of Contents

키워드 추출이란?

키워드 추출은 텍스트에서 중요한 단어나 구문을 자동으로 찾아내는 기술입니다. 주로 다음과 같은 목적으로 사용됩니다:

SEO 최적화: 블로그 글의 핵심 키워드 찾기
텍스트 요약: 긴 문서의 주제 파악
데이터 분석: 트렌드 분석, 감성 분석
검색 엔진: 관련 검색어 추천
콘텐츠 분류: 자동 태그 생성

1. 네이버 연관 검색어 가져오기

네이버 검색 시 나타나는 연관 검색어를 수집하는 방법입니다.

네이버 검색 API 사용

import requests
import json

def get_naver_related_keywords(keyword, client_id, client_secret):
    """
    네이버 검색 API로 연관 검색어 가져오기
    """
    url = "<https://openapi.naver.com/v1/search/shop.json>"

    headers = {
        "X-Naver-Client-Id": client_id,
        "X-Naver-Client-Secret": client_secret
    }

    params = {
        "query": keyword,
        "display": 10
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()

        data = response.json()

        # 상품 제목에서 키워드 추출
        keywords = set()
        for item in data.get('items', []):
            title = item['title'].replace('<b>', '').replace('</b>', '')
            keywords.add(title)

        return list(keywords)

    except Exception as e:
        print(f"오류 발생: {e}")
        return []

# 사용 예시 (API 키 필요)
client_id = "your_client_id"
client_secret = "your_client_secret"
keywords = get_naver_related_keywords("파이썬", client_id, client_secret)
print(keywords)

네이버 자동완성 키워드 수집

import requests
from urllib.parse import quote

def get_naver_autocomplete(keyword):
    """
    네이버 자동완성 키워드 가져오기
    """
    encoded_keyword = quote(keyword)
    url = f"<https://ac.search.naver.com/nx/ac?q={encoded_keyword}&con=0&frm=nv&ans=2&r_format=json&r_enc=UTF-8&r_unicode=0&t_koreng=1&run=2&rev=4&q_enc=UTF-8&st=100&_callback=_jsonp_0>"

    try:
        response = requests.get(url)
        text = response.text

        # JSONP 형식에서 JSON 추출
        json_text = text[text.find('(') + 1:text.rfind(')')]
        data = json.loads(json_text)

        # 키워드 추출
        keywords = []
        if 'items' in data and len(data['items']) > 0:
            for item in data['items'][0]:
                keywords.append(item[0])

        return keywords

    except Exception as e:
        print(f"오류 발생: {e}")
        return []

# 사용 예시
keywords = get_naver_autocomplete("파이썬")
print("네이버 자동완성 키워드:", keywords)

2. 구글 검색 키워드 가져오기

구글의 연관 검색어와 자동완성을 수집하는 방법입니다.

Google Suggest API 활용

import requests
import xml.etree.ElementTree as ET

def get_google_suggest(keyword):
    """
    구글 자동완성 키워드 가져오기
    """
    url = "<http://suggestqueries.google.com/complete/search>"

    params = {
        "output": "toolbar",
        "q": keyword,
        "hl": "ko"  # 한국어
    }

    try:
        response = requests.get(url, params=params)
        root = ET.fromstring(response.content)

        suggestions = []
        for suggestion in root.findall('.//suggestion'):
            data = suggestion.get('data')
            if data:
                suggestions.append(data)

        return suggestions

    except Exception as e:
        print(f"오류 발생: {e}")
        return []

# 사용 예시
keywords = get_google_suggest("파이썬")
print("구글 자동완성 키워드:", keywords)

여러 언어의 키워드 가져오기

def get_google_suggest_multilang(keyword, languages=['ko', 'en', 'ja']):
    """
    여러 언어로 구글 자동완성 키워드 가져오기
    """
    all_keywords = {}

    for lang in languages:
        url = "<http://suggestqueries.google.com/complete/search>"
        params = {
            "output": "toolbar",
            "q": keyword,
            "hl": lang
        }

        try:
            response = requests.get(url, params=params)
            root = ET.fromstring(response.content)

            keywords = []
            for suggestion in root.findall('.//suggestion'):
                data = suggestion.get('data')
                if data:
                    keywords.append(data)

            all_keywords[lang] = keywords

        except Exception as e:
            print(f"{lang} 오류: {e}")

    return all_keywords

# 사용 예시
keywords = get_google_suggest_multilang("python")
for lang, words in keywords.items():
    print(f"\\n{lang.upper()} 키워드:")
    for word in words:
        print(f"  - {word}")

3. 텍스트에서 키워드 자동 추출하기

문서나 글에서 중요한 키워드를 자동으로 추출하는 방법입니다.

필수 라이브러리 설치

# 한국어 자연어 처리
pip install konlpy

# 키워드 추출 라이브러리
pip install keybert
pip install yake

# 한국어 형태소 분석기 (선택)
# Windows: pip install python-mecab-ko
# Mac/Linux: 별도 설치 필요

방법 1: TF-IDF로 키워드 추출

TF-IDF는 가장 기본적이고 효과적인 키워드 추출 방법입니다.

from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def extract_keywords_tfidf(text, top_n=10):
    """
    TF-IDF로 키워드 추출
    """
    # TF-IDF 벡터라이저
    vectorizer = TfidfVectorizer(
        max_features=100,
        ngram_range=(1, 2),  # 단어와 2단어 조합
        stop_words=None
    )

    # 벡터 변환
    tfidf_matrix = vectorizer.fit_transform([text])

    # 특징 단어와 점수
    feature_names = vectorizer.get_feature_names_out()
    scores = tfidf_matrix.toarray()[0]

    # 점수 순으로 정렬
    indices = np.argsort(scores)[::-1][:top_n]

    keywords = []
    for idx in indices:
        if scores[idx] > 0:
            keywords.append({
                'keyword': feature_names[idx],
                'score': float(scores[idx])
            })

    return keywords

# 사용 예시
text = """
파이썬은 배우기 쉬운 프로그래밍 언어입니다.
파이썬을 사용하면 웹 개발, 데이터 분석, 인공지능 등
다양한 분야에서 활용할 수 있습니다.
특히 파이썬은 문법이 간단하여 초보자도 쉽게 배울 수 있습니다.
"""

keywords = extract_keywords_tfidf(text, top_n=5)
print("추출된 키워드:")
for kw in keywords:
    print(f"  {kw['keyword']}: {kw['score']:.4f}")

방법 2: KonlPy로 한국어 키워드 추출

한국어 텍스트에서 명사를 추출하여 키워드를 찾습니다.

from konlpy.tag import Okt
from collections import Counter

def extract_keywords_korean(text, top_n=10):
    """
    한국어 명사 기반 키워드 추출
    """
    # 형태소 분석기
    okt = Okt()

    # 명사 추출
    nouns = okt.nouns(text)

    # 한 글자 단어 제외
    nouns = [noun for noun in nouns if len(noun) > 1]

    # 빈도수 계산
    word_counts = Counter(nouns)

    # 상위 키워드
    keywords = word_counts.most_common(top_n)

    return [{'keyword': word, 'count': count} for word, count in keywords]

# 사용 예시
text = """
인공지능 기술이 빠르게 발전하고 있습니다.
특히 딥러닝과 머신러닝 분야에서 큰 진전이 있었습니다.
자연어 처리 기술도 GPT와 같은 대규모 언어 모델 덕분에
크게 발전했습니다. 이러한 인공지능 기술은 다양한 산업 분야에
적용되고 있으며, 앞으로도 계속 발전할 것으로 예상됩니다.
"""

keywords = extract_keywords_korean(text, top_n=5)
print("\\n한국어 키워드:")
for kw in keywords:
    print(f"  {kw['keyword']}: {kw['count']}회")

방법 3: KeyBERT로 고급 키워드 추출

BERT 모델을 사용한 최신 키워드 추출 방법입니다.

from keybert import KeyBERT

def extract_keywords_bert(text, top_n=5):
    """
    BERT 기반 키워드 추출
    """
    # KeyBERT 모델 초기화
    kw_model = KeyBERT()

    # 키워드 추출
    keywords = kw_model.extract_keywords(
        text,
        keyphrase_ngram_range=(1, 2),  # 1~2 단어 조합
        stop_words=None,
        top_n=top_n,
        use_maxsum=True,  # 다양성 증가
        nr_candidates=20
    )

    return [{'keyword': kw[0], 'score': kw[1]} for kw in keywords]

# 사용 예시
text = """
Machine learning is a subset of artificial intelligence that
focuses on building systems that can learn from data.
Deep learning, a type of machine learning, uses neural networks
to model complex patterns in data.
"""

keywords = extract_keywords_bert(text, top_n=5)
print("\\nBERT 키워드:")
for kw in keywords:
    print(f"  {kw['keyword']}: {kw['score']:.4f}")

방법 4: YAKE로 비지도 키워드 추출

학습 데이터 없이 키워드를 추출하는 YAKE 알고리즘입니다.

import yake

def extract_keywords_yake(text, language='ko', top_n=10):
    """
    YAKE 알고리즘으로 키워드 추출
    """
    # YAKE 키워드 추출기
    kw_extractor = yake.KeywordExtractor(
        lan=language,
        n=2,  # 최대 단어 개수
        dedupLim=0.9,  # 중복 제거
        top=top_n,
        features=None
    )

    # 키워드 추출
    keywords = kw_extractor.extract_keywords(text)

    return [{'keyword': kw[0], 'score': kw[1]} for kw in keywords]

# 사용 예시
text = """
클라우드 컴퓨팅은 인터넷을 통해 컴퓨팅 서비스를 제공하는 기술입니다.
AWS, Azure, GCP와 같은 클라우드 플랫폼을 통해
서버, 스토리지, 데이터베이스 등을 사용할 수 있습니다.
"""

keywords = extract_keywords_yake(text, top_n=5)
print("\\nYAKE 키워드:")
for kw in keywords:
    print(f"  {kw['keyword']}: {kw['score']:.4f}")

4. URL/웹페이지에서 키워드 추출하기

웹사이트의 메타 태그나 본문에서 키워드를 추출합니다.

import requests
from bs4 import BeautifulSoup
from konlpy.tag import Okt
from collections import Counter

def extract_keywords_from_url(url, top_n=10):
    """
    웹페이지에서 키워드 추출
    """
    try:
        # 웹페이지 가져오기
        response = requests.get(url, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        # 1. 메타 키워드 확인
        meta_keywords = []
        meta_tag = soup.find('meta', attrs={'name': 'keywords'})
        if meta_tag:
            meta_keywords = meta_tag.get('content', '').split(',')
            meta_keywords = [kw.strip() for kw in meta_keywords]

        # 2. 본문 텍스트 추출
        # script, style 태그 제거
        for script in soup(['script', 'style']):
            script.decompose()

        text = soup.get_text()

        # 3. 한국어 명사 추출
        okt = Okt()
        nouns = okt.nouns(text)
        nouns = [noun for noun in nouns if len(noun) > 1]

        # 빈도수 계산
        word_counts = Counter(nouns)
        extracted_keywords = word_counts.most_common(top_n)

        return {
            'meta_keywords': meta_keywords,
            'extracted_keywords': [
                {'keyword': word, 'count': count}
                for word, count in extracted_keywords
            ]
        }

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

# 사용 예시
url = "<https://example.com>"
result = extract_keywords_from_url(url, top_n=10)

if result:
    print("메타 키워드:", result['meta_keywords'])
    print("\\n추출된 키워드:")
    for kw in result['extracted_keywords']:
        print(f"  {kw['keyword']}: {kw['count']}회")

5. 여러 문서에서 공통 키워드 찾기

여러 문서를 비교하여 공통 키워드를 추출합니다.

from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def extract_common_keywords(documents, top_n=10):
    """
    여러 문서에서 공통 키워드 추출
    """
    # TF-IDF 벡터화
    vectorizer = TfidfVectorizer(
        max_features=100,
        ngram_range=(1, 2)
    )

    tfidf_matrix = vectorizer.fit_transform(documents)
    feature_names = vectorizer.get_feature_names_out()

    # 각 문서별 평균 TF-IDF 점수
    avg_scores = np.mean(tfidf_matrix.toarray(), axis=0)

    # 상위 키워드
    top_indices = np.argsort(avg_scores)[::-1][:top_n]

    keywords = []
    for idx in top_indices:
        keywords.append({
            'keyword': feature_names[idx],
            'avg_score': float(avg_scores[idx])
        })

    return keywords

# 사용 예시
documents = [
    "파이썬은 인공지능 개발에 많이 사용됩니다.",
    "인공지능 기술이 빠르게 발전하고 있습니다.",
    "머신러닝과 딥러닝은 인공지능의 핵심 기술입니다."
]

keywords = extract_common_keywords(documents, top_n=5)
print("공통 키워드:")
for kw in keywords:
    print(f"  {kw['keyword']}: {kw['avg_score']:.4f}")

6. 실시간 트렌드 키워드 수집

네이버, 구글 등의 실시간 검색어를 수집합니다.

import requests
from bs4 import BeautifulSoup
from datetime import datetime

def get_naver_trending_keywords():
    """
    네이버 실시간 검색어 가져오기
    """
    url = "<https://www.naver.com>"

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')

        # 실시간 검색어 추출 (HTML 구조는 변경될 수 있음)
        trending_keywords = []

        # 구조에 맞게 수정 필요
        keyword_elements = soup.select('.PM_CL_realtimeKeyword_rolling .ah_item')

        for idx, elem in enumerate(keyword_elements[:20], 1):
            keyword = elem.get_text(strip=True)
            trending_keywords.append({
                'rank': idx,
                'keyword': keyword,
                'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            })

        return trending_keywords

    except Exception as e:
        print(f"오류 발생: {e}")
        return []

# 사용 예시
trending = get_naver_trending_keywords()
print("네이버 실시간 검색어:")
for item in trending[:10]:
    print(f"  {item['rank']}. {item['keyword']}")

7. 키워드 클러스터링 (유사 키워드 그룹화)

비슷한 키워드끼리 묶어주는 기능입니다.

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import numpy as np

def cluster_keywords(keywords, n_clusters=3):
    """
    키워드를 클러스터링하여 그룹화
    """
    # TF-IDF 벡터화
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(keywords)

    # K-means 클러스터링
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(X)

    # 클러스터별 키워드 그룹화
    clustered_keywords = {}
    for i in range(n_clusters):
        clustered_keywords[f'그룹 {i+1}'] = []

    for keyword, cluster in zip(keywords, clusters):
        clustered_keywords[f'그룹 {cluster+1}'].append(keyword)

    return clustered_keywords

# 사용 예시
keywords = [
    "파이썬 프로그래밍",
    "파이썬 코딩",
    "머신러닝 알고리즘",
    "딥러닝 모델",
    "인공지능 개발",
    "프로그래밍 언어",
    "데이터 분석",
    "빅데이터 처리"
]

clusters = cluster_keywords(keywords, n_clusters=3)
print("\\n키워드 클러스터:")
for group, words in clusters.items():
    print(f"\\n{group}:")
    for word in words:
        print(f"  - {word}")

8. 완전한 키워드 분석 시스템

모든 기능을 통합한 실전 프로젝트입니다.

import requests
from bs4 import BeautifulSoup
from konlpy.tag import Okt
from collections import Counter
import pandas as pd
from datetime import datetime

class KeywordAnalyzer:
    def __init__(self):
        self.okt = Okt()
        self.keywords = []

    def extract_from_text(self, text, top_n=10):
        """텍스트에서 키워드 추출"""
        nouns = self.okt.nouns(text)
        nouns = [noun for noun in nouns if len(noun) > 1]

        word_counts = Counter(nouns)
        return word_counts.most_common(top_n)

    def extract_from_url(self, url):
        """URL에서 키워드 추출"""
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            # 본문 추출
            for script in soup(['script', 'style']):
                script.decompose()

            text = soup.get_text()
            return self.extract_from_text(text)

        except Exception as e:
            print(f"URL 처리 실패: {e}")
            return []

    def get_naver_autocomplete(self, keyword):
        """네이버 자동완성"""
        from urllib.parse import quote

        encoded = quote(keyword)
        url = f"<https://ac.search.naver.com/nx/ac?q={encoded}&con=0&frm=nv&ans=2&r_format=json>"

        try:
            response = requests.get(url)
            text = response.text
            json_text = text[text.find('(') + 1:text.rfind(')')]
            data = json.loads(json_text)

            keywords = []
            if 'items' in data and len(data['items']) > 0:
                for item in data['items'][0]:
                    keywords.append(item[0])

            return keywords
        except:
            return []

    def analyze_multiple_sources(self, sources):
        """
        여러 소스에서 키워드 수집 및 분석
        sources: [{'type': 'text', 'content': '...'}, {'type': 'url', 'content': 'http://...'}]
        """
        all_keywords = Counter()

        for source in sources:
            if source['type'] == 'text':
                keywords = self.extract_from_text(source['content'])
            elif source['type'] == 'url':
                keywords = self.extract_from_url(source['content'])
            else:
                continue

            for word, count in keywords:
                all_keywords[word] += count

        return all_keywords.most_common(20)

    def save_to_excel(self, keywords, filename):
        """키워드를 엑셀로 저장"""
        df = pd.DataFrame(keywords, columns=['키워드', '빈도수'])
        df['수집시간'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        df.to_excel(filename, index=False, engine='openpyxl')
        print(f"✅ {filename}에 저장 완료!")

    def generate_report(self, keywords, filename='keyword_report.xlsx'):
        """종합 리포트 생성"""
        df = pd.DataFrame(keywords, columns=['키워드', '빈도수'])

        with pd.ExcelWriter(filename, engine='openpyxl') as writer:
            # 전체 키워드
            df.to_excel(writer, sheet_name='전체키워드', index=False)

            # 상위 10개
            df.head(10).to_excel(writer, sheet_name='TOP10', index=False)

            # 통계
            stats = pd.DataFrame({
                '항목': ['총 키워드 수', '평균 빈도', '최고 빈도', '최저 빈도'],
                '값': [
                    len(df),
                    df['빈도수'].mean(),
                    df['빈도수'].max(),
                    df['빈도수'].min()
                ]
            })
            stats.to_excel(writer, sheet_name='통계', index=False)

        print(f"✅ 리포트 생성 완료: {filename}")

# 사용 예시
analyzer = KeywordAnalyzer()

# 1. 텍스트에서 키워드 추출
text = """
인공지능 기술이 빠르게 발전하면서 다양한 산업 분야에 적용되고 있습니다.
특히 자연어 처리와 컴퓨터 비전 분야에서 큰 진전이 있었습니다.
"""
keywords = analyzer.extract_from_text(text)
print("텍스트 키워드:", keywords)

# 2. 네이버 자동완성
naver_keywords = analyzer.get_naver_autocomplete("파이썬")
print("\\n네이버 자동완성:", naver_keywords)

# 3. 여러 소스 종합 분석
sources = [
    {'type': 'text', 'content': text},
    # {'type': 'url', 'content': '<https://example.com>'}
]
all_keywords = analyzer.analyze_multiple_sources(sources)

# 4. 리포트 생성
analyzer.generate_report(all_keywords, 'keyword_analysis.xlsx')

9. 키워드 난이도 분석

SEO를 위한 키워드 경쟁도를 분석합니다.

import requests
from urllib.parse import quote

def analyze_keyword_difficulty(keyword):
    """
    키워드 난이도 분석 (구글 검색 결과 수 기반)
    """
    encoded = quote(keyword)
    url = f"<https://www.google.com/search?q={encoded}>"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')

        # 검색 결과 수 추출
        result_stats = soup.find('div', {'id': 'result-stats'})
        if result_stats:
            text = result_stats.text
            # "약 1,234,567개" 형식에서 숫자 추출
            import re
            numbers = re.findall(r'[\\d,]+', text)
            if numbers:
                count = int(numbers[0].replace(',', ''))

                # 난이도 계산 (단순화)
                if count < 100000:
                    difficulty = '쉬움'
                elif count < 1000000:
                    difficulty = '보통'
                else:
                    difficulty = '어려움'

                return {
                    'keyword': keyword,
                    'result_count': count,
                    'difficulty': difficulty
                }

        return None

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

# 사용 예시
keywords = ["파이썬 기초", "파이썬 프로그래밍", "파이썬 입문"]

print("키워드 난이도 분석:")
for keyword in keywords:
    result = analyze_keyword_difficulty(keyword)
    if result:
        print(f"\\n{result['keyword']}")
        print(f"  검색 결과: {result['result_count']:,}개")
        print(f"  난이도: {result['difficulty']}")

10. 키워드 트렌드 분석

시간에 따른 키워드 인기도 변화를 추적합니다.

import pandas as pd
from datetime import datetime, timedelta
import json

class KeywordTrendTracker:
    def __init__(self):
        self.history = []

    def track_keyword(self, keyword, platform='naver'):
        """
        키워드를 추적하고 기록
        """
        from urllib.parse import quote

        if platform == 'naver':
            url = f"<https://ac.search.naver.com/nx/ac?q={quote(keyword)}>"

        try:
            response = requests.get(url)

            # 현재 시간과 함께 기록
            record = {
                'timestamp': datetime.now(),
                'keyword': keyword,
                'platform': platform,
                'status': 'success' if response.status_code == 200 else 'fail'
            }

            self.history.append(record)
            return True

        except Exception as e:
            print(f"추적 실패: {e}")
            return False

    def track_multiple_keywords(self, keywords, interval_minutes=60):
        """
        여러 키워드를 주기적으로 추적
        """
        import time

        while True:
            for keyword in keywords:
                self.track_keyword(keyword)
                print(f"[{datetime.now()}] {keyword} 추적 완료")
                time.sleep(5)  # API 부하 방지

            # 결과 저장
            self.save_history('keyword_trends.xlsx')

            print(f"\\n{interval_minutes}분 후 다시 추적합니다...\\n")
            time.sleep(interval_minutes * 60)

    def save_history(self, filename):
        """추적 기록을 엑셀로 저장"""
        if not self.history:
            return

        df = pd.DataFrame(self.history)
        df.to_excel(filename, index=False, engine='openpyxl')
        print(f"✅ 추적 기록 저장: {filename}")

    def analyze_trend(self):
        """트렌드 분석"""
        if not self.history:
            print("추적 기록이 없습니다.")
            return

        df = pd.DataFrame(self.history)

        # 키워드별 성공률
        success_rate = df.groupby('keyword')['status'].apply(
            lambda x: (x == 'success').sum() / len(x) * 100
        )

        print("\\n키워드별 추적 성공률:")
        for keyword, rate in success_rate.items():
            print(f"  {keyword}: {rate:.1f}%")

# 사용 예시
tracker = KeywordTrendTracker()

# 단일 키워드 추적
tracker.track_keyword("파이썬")

# 여러 키워드 추적 (무한 루프, Ctrl+C로 중단)
# keywords = ["파이썬", "자바", "자바스크립트"]
# tracker.track_multiple_keywords(keywords, interval_minutes=60)

11. 키워드 조합 생성기

롱테일 키워드를 자동으로 생성합니다.

def generate_keyword_combinations(base_keyword, modifiers):
    """
    기본 키워드와 수식어를 조합하여 롱테일 키워드 생성
    """
    combinations = []

    # 전치사 조합
    prefixes = ['', '최고의 ', '추천 ', '인기 ', '베스트 ']

    # 후치사 조합
    suffixes = ['', ' 추천', ' 방법', ' 가이드', ' 완벽정리', ' 정리']

    for prefix in prefixes:
        for suffix in suffixes:
            if prefix or suffix:  # 빈 조합 제외
                keyword = f"{prefix}{base_keyword}{suffix}".strip()
                combinations.append(keyword)

    # 수식어와 조합
    for modifier in modifiers:
        combinations.append(f"{base_keyword} {modifier}")
        combinations.append(f"{modifier} {base_keyword}")

    # 중복 제거
    combinations = list(set(combinations))

    return combinations

# 사용 예시
base_keyword = "파이썬"
modifiers = ["기초", "입문", "초보", "강의", "책", "튜토리얼", "예제"]

combinations = generate_keyword_combinations(base_keyword, modifiers)

print(f"생성된 키워드 조합 ({len(combinations)}개):")
for i, keyword in enumerate(combinations[:20], 1):
    print(f"  {i}. {keyword}")

12. 경쟁사 키워드 분석

경쟁 사이트의 키워드를 분석합니다.

import requests
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd

class CompetitorAnalyzer:
    def __init__(self):
        self.okt = Okt()

    def analyze_competitor(self, url):
        """
        경쟁사 웹사이트 분석
        """
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }

            response = requests.get(url, headers=headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            analysis = {
                'url': url,
                'title': '',
                'meta_description': '',
                'meta_keywords': [],
                'h1_tags': [],
                'h2_tags': [],
                'extracted_keywords': []
            }

            # 제목
            title = soup.find('title')
            if title:
                analysis['title'] = title.text.strip()

            # 메타 설명
            meta_desc = soup.find('meta', attrs={'name': 'description'})
            if meta_desc:
                analysis['meta_description'] = meta_desc.get('content', '')

            # 메타 키워드
            meta_kw = soup.find('meta', attrs={'name': 'keywords'})
            if meta_kw:
                keywords = meta_kw.get('content', '').split(',')
                analysis['meta_keywords'] = [kw.strip() for kw in keywords]

            # H1, H2 태그
            h1_tags = soup.find_all('h1')
            analysis['h1_tags'] = [h1.text.strip() for h1 in h1_tags]

            h2_tags = soup.find_all('h2')
            analysis['h2_tags'] = [h2.text.strip() for h2 in h2_tags[:10]]

            # 본문에서 키워드 추출
            for script in soup(['script', 'style']):
                script.decompose()

            text = soup.get_text()
            nouns = self.okt.nouns(text)
            nouns = [n for n in nouns if len(n) > 1]

            word_counts = Counter(nouns)
            analysis['extracted_keywords'] = word_counts.most_common(20)

            return analysis

        except Exception as e:
            print(f"분석 실패: {e}")
            return None

    def compare_competitors(self, urls):
        """
        여러 경쟁사 비교 분석
        """
        results = []

        for url in urls:
            print(f"분석 중: {url}")
            result = self.analyze_competitor(url)
            if result:
                results.append(result)

        return results

    def export_to_excel(self, results, filename='competitor_analysis.xlsx'):
        """
        분석 결과를 엑셀로 저장
        """
        with pd.ExcelWriter(filename, engine='openpyxl') as writer:
            # 요약 정보
            summary_data = []
            for result in results:
                summary_data.append({
                    'URL': result['url'],
                    '제목': result['title'],
                    '메타 설명': result['meta_description'],
                    'H1 개수': len(result['h1_tags']),
                    'H2 개수': len(result['h2_tags'])
                })

            summary_df = pd.DataFrame(summary_data)
            summary_df.to_excel(writer, sheet_name='요약', index=False)

            # 각 경쟁사별 상세 키워드
            for idx, result in enumerate(results, 1):
                if result['extracted_keywords']:
                    kw_df = pd.DataFrame(
                        result['extracted_keywords'],
                        columns=['키워드', '빈도']
                    )
                    sheet_name = f'경쟁사{idx}'[:31]  # 엑셀 시트명 길이 제한
                    kw_df.to_excel(writer, sheet_name=sheet_name, index=False)

        print(f"✅ 분석 결과 저장: {filename}")

# 사용 예시
analyzer = CompetitorAnalyzer()

# 경쟁사 URL 목록
competitor_urls = [
    "<https://example1.com>",
    "<https://example2.com>",
    "<https://example3.com>"
]

# 비교 분석
# results = analyzer.compare_competitors(competitor_urls)
# analyzer.export_to_excel(results)

13. 키워드 검색량 예측

과거 데이터를 기반으로 검색량을 예측합니다.

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def predict_keyword_volume(keyword, historical_data):
    """
    간단한 이동 평균 기반 검색량 예측
    historical_data: [{'date': '2025-01-01', 'volume': 1000}, ...]
    """
    df = pd.DataFrame(historical_data)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')

    # 7일 이동 평균
    df['ma_7'] = df['volume'].rolling(window=7).mean()

    # 30일 이동 평균
    df['ma_30'] = df['volume'].rolling(window=30).mean()

    # 다음 7일 예측 (최근 30일 평균 사용)
    last_ma_30 = df['ma_30'].iloc[-1]

    predictions = []
    for i in range(7):
        next_date = df['date'].max() + timedelta(days=i+1)
        predictions.append({
            'date': next_date.strftime('%Y-%m-%d'),
            'predicted_volume': int(last_ma_30),
            'keyword': keyword
        })

    return predictions

# 사용 예시 (가상 데이터)
historical_data = [
    {'date': '2025-01-01', 'volume': 1000},
    {'date': '2025-01-02', 'volume': 1100},
    {'date': '2025-01-03', 'volume': 1200},
    {'date': '2025-01-04', 'volume': 1150},
    {'date': '2025-01-05', 'volume': 1300},
    {'date': '2025-01-06', 'volume': 1250},
    {'date': '2025-01-07', 'volume': 1400},
]

predictions = predict_keyword_volume("파이썬", historical_data)
print("\\n검색량 예측:")
for pred in predictions:
    print(f"  {pred['date']}: {pred['predicted_volume']} 예상")

14. 키워드 감성 분석

키워드와 관련된 감정을 분석합니다.

from textblob import TextBlob
from konlpy.tag import Okt

def analyze_keyword_sentiment(keyword, texts):
    """
    키워드에 대한 감성 분석
    texts: 키워드가 포함된 텍스트 리스트
    """
    sentiments = {
        'positive': 0,
        'neutral': 0,
        'negative': 0
    }

    for text in texts:
        if keyword in text:
            # TextBlob으로 감성 분석 (영어)
            blob = TextBlob(text)
            polarity = blob.sentiment.polarity

            if polarity > 0.1:
                sentiments['positive'] += 1
            elif polarity < -0.1:
                sentiments['negative'] += 1
            else:
                sentiments['neutral'] += 1

    total = sum(sentiments.values())
    if total > 0:
        return {
            'keyword': keyword,
            'positive_ratio': sentiments['positive'] / total * 100,
            'neutral_ratio': sentiments['neutral'] / total * 100,
            'negative_ratio': sentiments['negative'] / total * 100,
            'total_mentions': total
        }

    return None

# 사용 예시
texts = [
    "Python is an amazing programming language!",
    "I love coding with Python, it's so easy.",
    "Python can be slow for some tasks.",
    "Python is okay for beginners.",
]

result = analyze_keyword_sentiment("Python", texts)
if result:
    print(f"\\n키워드: {result['keyword']}")
    print(f"긍정: {result['positive_ratio']:.1f}%")
    print(f"중립: {result['neutral_ratio']:.1f}%")
    print(f"부정: {result['negative_ratio']:.1f}%")
    print(f"총 언급: {result['total_mentions']}회")

15. 완전한 키워드 리서치 도구

모든 기능을 통합한 최종 프로젝트입니다.

import requests
from bs4 import BeautifulSoup
from konlpy.tag import Okt
from collections import Counter
import pandas as pd
from datetime import datetime
import json

class KeywordResearchTool:
    def __init__(self):
        self.okt = Okt()
        self.results = {
            'extracted_keywords': [],
            'autocomplete_keywords': [],
            'competitor_keywords': [],
            'combined_keywords': []
        }

    def extract_from_text(self, text, top_n=20):
        """텍스트에서 키워드 추출"""
        nouns = self.okt.nouns(text)
        nouns = [n for n in nouns if len(n) > 1]

        word_counts = Counter(nouns)
        keywords = word_counts.most_common(top_n)

        self.results['extracted_keywords'] = keywords
        return keywords

    def get_autocomplete(self, keyword, platform='naver'):
        """자동완성 키워드 가져오기"""
        from urllib.parse import quote

        keywords = []

        if platform == 'naver':
            url = f"<https://ac.search.naver.com/nx/ac?q={quote(keyword)}>"
            try:
                response = requests.get(url)
                text = response.text
                json_text = text[text.find('(') + 1:text.rfind(')')]
                data = json.loads(json_text)

                if 'items' in data and len(data['items']) > 0:
                    for item in data['items'][0]:
                        keywords.append(item[0])
            except:
                pass

        self.results['autocomplete_keywords'] = keywords
        return keywords

    def analyze_url(self, url):
        """URL에서 키워드 분석"""
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            # 텍스트 추출
            for script in soup(['script', 'style']):
                script.decompose()

            text = soup.get_text()

            # 키워드 추출
            return self.extract_from_text(text, top_n=30)

        except Exception as e:
            print(f"URL 분석 실패: {e}")
            return []

    def generate_combinations(self, base_keyword):
        """키워드 조합 생성"""
        prefixes = ['최고', '추천', '인기', '베스트', '완벽']
        suffixes = ['방법', '가이드', '정리', '추천', '완벽정리', '꿀팁']

        combinations = set()

        for prefix in prefixes:
            combinations.add(f"{prefix} {base_keyword}")

        for suffix in suffixes:
            combinations.add(f"{base_keyword} {suffix}")

        # 2단어 조합
        for p in prefixes:
            for s in suffixes:
                combinations.add(f"{p} {base_keyword} {s}")

        return list(combinations)

    def comprehensive_research(self, seed_keyword, urls=None):
        """종합 키워드 리서치"""
        print(f"🔍 '{seed_keyword}' 키워드 리서치 시작...\\n")

        # 1. 자동완성 키워드
        print("1. 자동완성 키워드 수집 중...")
        autocomplete = self.get_autocomplete(seed_keyword)
        print(f"   ✅ {len(autocomplete)}개 수집\\n")

        # 2. 키워드 조합 생성
        print("2. 키워드 조합 생성 중...")
        combinations = self.generate_combinations(seed_keyword)
        print(f"   ✅ {len(combinations)}개 생성\\n")

        # 3. URL 분석 (옵션)
        if urls:
            print("3. 경쟁사 URL 분석 중...")
            for url in urls:
                print(f"   분석: {url}")
                self.analyze_url(url)
            print("   ✅ 분석 완료\\n")

        # 4. 결과 통합
        all_keywords = set()
        all_keywords.update(autocomplete)
        all_keywords.update(combinations)
        all_keywords.update([kw[0] for kw in self.results['extracted_keywords']])

        self.results['combined_keywords'] = list(all_keywords)

        print(f"✅ 총 {len(all_keywords)}개의 키워드 수집 완료!")

        return self.results

    def export_results(self, filename='keyword_research.xlsx'):
        """결과를 엑셀로 저장"""
        with pd.ExcelWriter(filename, engine='openpyxl') as writer:
            # 1. 추출된 키워드
            if self.results['extracted_keywords']:
                df1 = pd.DataFrame(
                    self.results['extracted_keywords'],
                    columns=['키워드', '빈도']
                )
                df1.to_excel(writer, sheet_name='추출키워드', index=False)

            # 2. 자동완성 키워드
            if self.results['autocomplete_keywords']:
                df2 = pd.DataFrame({
                    '키워드': self.results['autocomplete_keywords']
                })
                df2.to_excel(writer, sheet_name='자동완성', index=False)

            # 3. 통합 키워드
            if self.results['combined_keywords']:
                df3 = pd.DataFrame({
                    '키워드': self.results['combined_keywords']
                })
                df3.to_excel(writer, sheet_name='통합키워드', index=False)

            # 4. 요약
            summary = pd.DataFrame({
                '항목': [
                    '추출된 키워드',
                    '자동완성 키워드',
                    '통합 키워드',
                    '리서치 일시'
                ],
                '값': [
                    len(self.results['extracted_keywords']),
                    len(self.results['autocomplete_keywords']),
                    len(self.results['combined_keywords']),
                    datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                ]
            })
            summary.to_excel(writer, sheet_name='요약', index=False)

        print(f"\\n✅ 결과 저장 완료: {filename}")

# 사용 예시
if __name__ == "__main__":
    tool = KeywordResearchTool()

    # 시드 키워드
    seed_keyword = "파이썬 프로그래밍"

    # 종합 리서치
    results = tool.comprehensive_research(
        seed_keyword,
        urls=None  # 경쟁사 URL 추가 가능
    )

    # 결과 출력
    print("\\n" + "="*50)
    print("자동완성 키워드 (상위 10개):")
    print("="*50)
    for kw in results['autocomplete_keywords'][:10]:
        print(f"  • {kw}")

    # 엑셀로 저장
    tool.export_results('keyword_research_result.xlsx')

자주 발생하는 문제와 해결법

문제 1: 한국어 형태소 분석 오류

# 해결: 여러 분석기 시도
from konlpy.tag import Okt, Kkma, Komoran

try:
    okt = Okt()
    nouns = okt.nouns(text)
except:
    try:
        kkma = Kkma()
        nouns = kkma.nouns(text)
    except:
        # 기본 공백 분리
        nouns = text.split()

문제 2: API 호출 제한

import time

# 해결: 요청 사이에 지연 추가
for keyword in keywords:
    result = get_autocomplete(keyword)
    time.sleep(1)  # 1초 대기

문제 3: 인코딩 오류

# 해결: 명시적 인코딩 지정
from urllib.parse import quote

keyword = "파이썬"
encoded = quote(keyword)  # URL 인코딩

마치며

파이썬으로 키워드를 추출하는 다양한 방법을 알아보았습니다. 핵심 포인트를 정리하면:

키워드 소스:

검색 엔진 자동완성 (네이버, 구글)
텍스트 분석 (TF-IDF, KonlPy)
웹 크롤링 (메타 태그, 본문)
경쟁사 분석

추천 방법:

초보자: 네이버/구글 자동완성 API
블로거: TF-IDF + KonlPy 조합
마케터: 종합 리서치 도구
개발자: KeyBERT + YAKE

이제 여러분도 데이터 기반 키워드 리서치 전문가입니다!