고객 여정 분석 (Customer Journey Analytics)¶
개요¶
고객 여정 분석은 고객이 브랜드와 상호작용하는 전체 경로를 추적하고 분석하는 방법론이다. 터치포인트 간 전환, 이탈 지점, 최적 경로를 파악하여 경험 최적화에 활용한다.
여정 맵핑¶
단계별 구조¶
┌─────────────────────────────────────────────────────────────────────────────┐
│ 고객 여정 │
│ │
│ 인지 관심 고려 구매 충성 │
│ ┌────┐ ┌────┐ ┌────┐ ┌────┐ ┌────┐ │
│ │광고│──────▶│방문│──────▶│탐색│──────▶│전환│──────▶│재구매│ │
│ │SNS │ │검색│ │비교│ │결제│ │추천 │ │
│ └────┘ └────┘ └────┘ └────┘ └────┘ │
│ │ │ │ │ │ │
│ ▼ ▼ ▼ ▼ ▼ │
│ 이탈 10% 이탈 30% 이탈 40% 이탈 15% 이탈 5% │
└─────────────────────────────────────────────────────────────────────────────┘
여정 데이터 모델¶
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional
import pandas as pd
@dataclass
class Touchpoint:
customer_id: str
timestamp: datetime
channel: str # web, app, email, store, call
action: str # page_view, click, purchase, etc.
stage: str # awareness, interest, consideration, purchase, loyalty
session_id: Optional[str] = None
properties: Optional[dict] = None
def create_journey_dataframe(touchpoints: List[Touchpoint]) -> pd.DataFrame:
"""터치포인트 리스트를 여정 데이터프레임으로 변환"""
df = pd.DataFrame([vars(t) for t in touchpoints])
df = df.sort_values(['customer_id', 'timestamp'])
# 여정 시퀀스 번호
df['touchpoint_seq'] = df.groupby('customer_id').cumcount() + 1
# 이전 터치포인트 정보
df['prev_channel'] = df.groupby('customer_id')['channel'].shift(1)
df['prev_action'] = df.groupby('customer_id')['action'].shift(1)
df['time_since_prev'] = (
df.groupby('customer_id')['timestamp'].diff().dt.total_seconds() / 3600
) # 시간 단위
return df
여정 분석 유형¶
1. 경로 분석 (Path Analysis)¶
from collections import Counter
from itertools import combinations
def analyze_paths(
journeys: pd.DataFrame,
customer_id: str = 'customer_id',
action_col: str = 'action',
max_path_length: int = 5
) -> dict:
"""가장 흔한 경로 분석"""
paths = []
for cid, group in journeys.groupby(customer_id):
path = tuple(group[action_col].tolist()[:max_path_length])
paths.append(path)
path_counts = Counter(paths)
total_journeys = len(paths)
# 상위 경로
top_paths = path_counts.most_common(10)
return {
'total_journeys': total_journeys,
'unique_paths': len(path_counts),
'top_paths': [
{
'path': ' → '.join(p),
'count': c,
'percentage': round(c / total_journeys * 100, 2)
}
for p, c in top_paths
]
}
2. 전환 퍼널 분석¶
def funnel_analysis(
journeys: pd.DataFrame,
customer_id: str = 'customer_id',
stage_col: str = 'stage',
stages: List[str] = None
) -> pd.DataFrame:
"""단계별 전환 퍼널 분석"""
if stages is None:
stages = ['awareness', 'interest', 'consideration', 'purchase', 'loyalty']
# 고객별 도달 단계
customer_stages = journeys.groupby(customer_id)[stage_col].apply(set)
funnel_data = []
for i, stage in enumerate(stages):
reached = sum(1 for s in customer_stages if stage in s)
if i == 0:
conversion_rate = 100.0
else:
prev_reached = sum(1 for s in customer_stages if stages[i-1] in s)
conversion_rate = (reached / prev_reached * 100) if prev_reached > 0 else 0
funnel_data.append({
'stage': stage,
'customers': reached,
'conversion_rate': round(conversion_rate, 1),
'cumulative_rate': round(reached / len(customer_stages) * 100, 1)
})
return pd.DataFrame(funnel_data)
3. 이탈 지점 분석¶
def dropout_analysis(
journeys: pd.DataFrame,
customer_id: str = 'customer_id',
action_col: str = 'action',
conversion_action: str = 'purchase'
) -> dict:
"""이탈 지점 분석"""
# 전환/비전환 분류
converted_customers = journeys[
journeys[action_col] == conversion_action
][customer_id].unique()
journeys['converted'] = journeys[customer_id].isin(converted_customers)
# 비전환 고객의 마지막 액션
non_converted = journeys[~journeys['converted']]
last_actions = non_converted.groupby(customer_id).last()[action_col]
dropout_points = last_actions.value_counts()
# 전환 고객의 경로 길이 vs 비전환
converted_journey_length = journeys[journeys['converted']].groupby(customer_id).size().mean()
non_converted_journey_length = non_converted.groupby(customer_id).size().mean()
return {
'dropout_points': dropout_points.to_dict(),
'avg_journey_length_converted': round(converted_journey_length, 1),
'avg_journey_length_non_converted': round(non_converted_journey_length, 1),
'conversion_rate': round(len(converted_customers) / journeys[customer_id].nunique() * 100, 2)
}
4. 채널 기여도 분석 (Attribution)¶
모델 유형:
| 모델 | 설명 | 적합 상황 |
|---|---|---|
| Last Touch | 마지막 터치포인트에 100% | 단순, 직접 전환 중심 |
| First Touch | 첫 터치포인트에 100% | 인지도 중심 |
| Linear | 모든 터치포인트에 균등 분배 | 균형잡힌 분석 |
| Time Decay | 전환에 가까울수록 높은 가중치 | 구매 결정 중심 |
| Position Based | U자형 (첫/마지막 높음) | 인지+전환 중시 |
| Data-Driven | ML 기반 기여도 | 데이터 충분 시 |
import numpy as np
def multi_touch_attribution(
journeys: pd.DataFrame,
customer_id: str = 'customer_id',
channel_col: str = 'channel',
conversion_col: str = 'converted',
model: str = 'linear'
) -> pd.DataFrame:
"""멀티터치 어트리뷰션"""
attribution = {}
for cid, group in journeys.groupby(customer_id):
if not group[conversion_col].any():
continue
channels = group[channel_col].tolist()
n = len(channels)
if model == 'last_touch':
weights = [0] * (n-1) + [1]
elif model == 'first_touch':
weights = [1] + [0] * (n-1)
elif model == 'linear':
weights = [1/n] * n
elif model == 'time_decay':
# 지수 감쇠 (반감기 = 7일)
decay = 0.5 ** (np.arange(n-1, -1, -1) / 7)
weights = decay / decay.sum()
elif model == 'position_based':
# 40% 첫번째, 40% 마지막, 나머지 20% 균등
if n == 1:
weights = [1]
elif n == 2:
weights = [0.5, 0.5]
else:
weights = [0.4] + [0.2/(n-2)] * (n-2) + [0.4]
for channel, weight in zip(channels, weights):
attribution[channel] = attribution.get(channel, 0) + weight
result = pd.DataFrame([
{'channel': ch, 'attributed_conversions': conv}
for ch, conv in attribution.items()
])
result['percentage'] = result['attributed_conversions'] / result['attributed_conversions'].sum() * 100
return result.sort_values('attributed_conversions', ascending=False)
5. Markov Chain 기여도¶
from collections import defaultdict
def markov_attribution(
journeys: pd.DataFrame,
customer_id: str = 'customer_id',
channel_col: str = 'channel',
conversion_col: str = 'converted'
) -> pd.DataFrame:
"""Markov Chain 기반 채널 기여도"""
# 전이 확률 계산
transitions = defaultdict(lambda: defaultdict(int))
for cid, group in journeys.groupby(customer_id):
path = ['start'] + group[channel_col].tolist()
if group[conversion_col].any():
path.append('conversion')
else:
path.append('null')
for i in range(len(path) - 1):
transitions[path[i]][path[i+1]] += 1
# 전이 확률 정규화
transition_probs = {}
for state, next_states in transitions.items():
total = sum(next_states.values())
transition_probs[state] = {
ns: count/total for ns, count in next_states.items()
}
# 기본 전환율 계산
def calculate_conversion_prob(trans_probs):
# 간단한 시뮬레이션 (실제로는 행렬 연산 사용)
conversions = 0
simulations = 10000
for _ in range(simulations):
state = 'start'
for _ in range(20): # 최대 스텝
if state == 'conversion':
conversions += 1
break
if state == 'null' or state not in trans_probs:
break
probs = trans_probs[state]
states = list(probs.keys())
weights = list(probs.values())
state = np.random.choice(states, p=weights)
return conversions / simulations
base_prob = calculate_conversion_prob(transition_probs)
# 채널별 제거 효과 계산
channels = set(journeys[channel_col].unique())
removal_effects = {}
for channel in channels:
# 채널 제거
modified_probs = {
s: {ns: p for ns, p in next_s.items() if ns != channel}
for s, next_s in transition_probs.items()
if s != channel
}
# 확률 재정규화
for s in modified_probs:
total = sum(modified_probs[s].values())
if total > 0:
modified_probs[s] = {ns: p/total for ns, p in modified_probs[s].items()}
removal_prob = calculate_conversion_prob(modified_probs)
removal_effects[channel] = base_prob - removal_prob
# 기여도 정규화
total_effect = sum(removal_effects.values())
attribution = {
ch: effect / total_effect * 100 if total_effect > 0 else 0
for ch, effect in removal_effects.items()
}
return pd.DataFrame([
{'channel': ch, 'attribution_pct': round(pct, 2)}
for ch, pct in sorted(attribution.items(), key=lambda x: -x[1])
])
여정 시각화¶
산키 다이어그램¶
import plotly.graph_objects as go
def create_sankey_diagram(
journeys: pd.DataFrame,
from_col: str = 'prev_action',
to_col: str = 'action'
) -> go.Figure:
"""여정 흐름 산키 다이어그램"""
# 전환 집계
flow = journeys.groupby([from_col, to_col]).size().reset_index(name='count')
flow = flow.dropna()
# 노드 라벨
all_nodes = list(set(flow[from_col].tolist() + flow[to_col].tolist()))
node_indices = {node: i for i, node in enumerate(all_nodes)}
# 링크 데이터
source = [node_indices[n] for n in flow[from_col]]
target = [node_indices[n] for n in flow[to_col]]
value = flow['count'].tolist()
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
label=all_nodes
),
link=dict(
source=source,
target=target,
value=value
)
)])
fig.update_layout(title="Customer Journey Flow")
return fig
여정 히트맵¶
def journey_heatmap(
journeys: pd.DataFrame,
stage_col: str = 'stage',
channel_col: str = 'channel'
) -> pd.DataFrame:
"""단계-채널 히트맵 데이터"""
heatmap = journeys.groupby([stage_col, channel_col]).size().unstack(fill_value=0)
# 비율로 변환
heatmap_pct = heatmap.div(heatmap.sum(axis=1), axis=0) * 100
return heatmap_pct.round(1)
여정 세그멘테이션¶
여정 패턴 클러스터링¶
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
def cluster_journeys(
journey_features: pd.DataFrame,
n_clusters: int = 5
) -> pd.DataFrame:
"""여정 패턴 클러스터링"""
# 피처 예시:
# - journey_length: 터치포인트 수
# - unique_channels: 사용 채널 수
# - time_to_convert: 전환까지 시간
# - web_ratio: 웹 비율
# - mobile_ratio: 모바일 비율
scaler = StandardScaler()
X_scaled = scaler.fit_transform(journey_features)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
journey_features['cluster'] = clusters
# 클러스터 프로파일
profiles = journey_features.groupby('cluster').agg({
col: 'mean' for col in journey_features.columns if col != 'cluster'
})
return journey_features, profiles
실시간 여정 추적¶
이벤트 스트림 처리¶
from datetime import datetime, timedelta
import json
class RealTimeJourneyTracker:
def __init__(self, session_timeout_minutes: int = 30):
self.active_sessions = {}
self.session_timeout = timedelta(minutes=session_timeout_minutes)
def process_event(self, event: dict) -> dict:
"""실시간 이벤트 처리"""
customer_id = event['customer_id']
timestamp = datetime.fromisoformat(event['timestamp'])
# 세션 관리
if customer_id in self.active_sessions:
session = self.active_sessions[customer_id]
if timestamp - session['last_activity'] > self.session_timeout:
# 새 세션 시작
session = self._create_session(customer_id, timestamp)
else:
session['last_activity'] = timestamp
session['touchpoints'].append(event)
else:
session = self._create_session(customer_id, timestamp)
session['touchpoints'].append(event)
self.active_sessions[customer_id] = session
# 여정 상태 분석
journey_state = self._analyze_journey_state(session)
return {
'customer_id': customer_id,
'session_id': session['session_id'],
'journey_length': len(session['touchpoints']),
'current_stage': journey_state['stage'],
'conversion_probability': journey_state['conversion_prob'],
'next_best_action': journey_state['recommended_action']
}
def _create_session(self, customer_id: str, timestamp: datetime) -> dict:
return {
'session_id': f"{customer_id}_{timestamp.isoformat()}",
'customer_id': customer_id,
'start_time': timestamp,
'last_activity': timestamp,
'touchpoints': []
}
def _analyze_journey_state(self, session: dict) -> dict:
"""현재 여정 상태 분석"""
touchpoints = session['touchpoints']
actions = [tp['action'] for tp in touchpoints]
# 간단한 규칙 기반 분석 (실제로는 ML 모델 사용)
if 'purchase' in actions:
stage = 'post_purchase'
conversion_prob = 1.0
recommended_action = 'cross_sell'
elif 'add_to_cart' in actions:
stage = 'purchase_intent'
conversion_prob = 0.6
recommended_action = 'checkout_reminder'
elif 'product_view' in actions:
stage = 'consideration'
conversion_prob = 0.3
recommended_action = 'product_recommendation'
else:
stage = 'browsing'
conversion_prob = 0.1
recommended_action = 'engagement_content'
return {
'stage': stage,
'conversion_prob': conversion_prob,
'recommended_action': recommended_action
}
주요 KPI¶
| KPI | 설명 | 계산 |
|---|---|---|
| Avg. Journey Length | 평균 터치포인트 수 | SUM(touchpoints) / customers |
| Time to Convert | 전환까지 평균 시간 | AVG(conversion_time - first_touch) |
| Cross-Channel Rate | 크로스 채널 사용률 | multi_channel_customers / all |
| Path Conversion Rate | 경로별 전환율 | conversions / path_starts |
| Attribution Efficiency | 채널 효율성 | conversions_attributed / spend |
체크리스트¶
- [ ] 터치포인트 데이터 수집 (모든 채널)
- [ ] 고객 ID 통합 (CDP)
- [ ] 여정 단계 정의
- [ ] 경로 분석 수행
- [ ] 이탈 지점 파악
- [ ] 어트리뷰션 모델 선택 및 적용
- [ ] 산키/퍼널 시각화
- [ ] 여정 세그먼트 생성
- [ ] 실시간 추적 구현 (선택)
참고 자료¶
마지막 업데이트: 2026-03-04