1번. 임직원 로그인 빈도 분석
select
cnt as unique_logins,
count(employee_id) as employee_count
from
(select
l.employee_id,
count(l.login_id) cnt
from logins l
join employees e
on l.employee_id = e.employee_id
where
l.login_time between '2023-07' and '2023-09'
and l.login_result = 'SUCCESS'
group by l.employee_id) a
group by cnt
order by 1
⬇︎
- 수정 1) employees 테이블은 필요가 없었음 ^^a
- 수정 2) login_time(데이터형식: datetime)에 조건 거는 부분 ⭐️
- 내가 한대로 하면 login_time은 datetime형식인데 '2023-07', '2023-09'은 문자열이라 직접 비교하면 정확하게 집계가 안됨.
- '2023-07' → '2023-07-01 00:00:00' 로 변환
- '2023-09' → '2023-09-01 00:00:00' 로 변환 ➜ 2023년 9월 데이터 제외됨
※ 올바른 방법 ※
1️⃣ date_format() : 포맷된 날짜를 문자열로 반환
→ date_format(login_time,'%Y%m') between '202307' and '202309'
2️⃣ date() : datetime 형식의 값에서 날짜값만 추출, 형변환 X
→ date(login_time) between '2023-07-01' and '2023-09-30'
select
login_cnt as unique_logins,
count(employee_id) as employee_count
from
(select
employee_id,
count(1) login_cnt
from logins
where
login_result ='SUCCESS'
and date_format(login_time,'%Y-%m-%d') between '2023-07-01' and '2023-09-30'
-- date_format(login_time,'%Y%m') between '202307' and '202309'
-- date(login_time) between '2023-07-01' and '2023-09-30'
group by employee_id) a
group by login_cnt
order by 1
with success as (
select
employee_id,
count(login_id) cnt
from logins
where
login_result = 'SUCCESS'
and date_format(login_time,'%Y%m') between '202307' and '202309'
group by employee_id
)
select
cnt unique_logins,
count(1) employee_count
from success
group by cnt
order by 1
2번. 세 번째로 높은 급여를 받는 직원
select
employee_id,
name,
salary
from
(select
employee_id,
name,
salary,
dense_rank() over(order by salary desc) rnk
from employee_salary) a
where rnk = 3
order by 1
limit 출력할 행 개수(n) offset 시작 행(s)
limit 시작행(s), 출력할 행 개수(n)
☞ 해석 : s번째 행부터 시작해서 n개 출력 (offset부터 해석)
☞ 주의 : 첫 번째 행 0부터 시작
select
employee_id,
name,
salary
from employee_salary
where salary =
(
select distinct salary
from employee_salary
order by salary desc
limit 1 offset 2
)
order by 1
with third as (
select
employee_id,
name,
salary,
dense_rank() over(order by salary desc) rnk
from employee_salary
)
select
employee_id,
name,
salary
from third
where rnk = 3
order by 1
3번. 부서 간 메시지 비율 계산
select
round(sum(cnt) / count(1) * 100 , 1) as inter_department_msg_pct
from
(select
case when senderdep != recdep then 1
else 0
end as cnt
from
(select
m.sender_id,
m.receiver_id,
e1.employee_id as senderid,
e1.name as sendername,
e1.department as senderdep,
e2.employee_id as recid,
e2.name as recname,
e2.department as recdep
from messages m
join employees e1
on m.sender_id = e1.employee_id
join employees e2
on m.receiver_id = e2.employee_id) a) b
⬇︎
- 수정 1) 서브쿼리에 필요없는 컬럼은 지우자 . . (수정 3에 따라 서브쿼리 자체를 없애줌)
- 수정 2) case when에 직접 sum 붙여줄 수 있음.
- 수정 3) 조인하고 컬럼 따로 안 빼주고 바로 sum(case when ~ ) 즉, 서브쿼리 필요없음!
select
round(sum(case when se.department != re.department then 1
else 0 end)
/ count(1) * 100
,1) as inter_department_msg_pct
from messages m
join employees se
on m.sender_id = se.employee_id
join employees re
on m.receiver_id = re.employee_id
with message as (
select
se.department as sendep,
re.department as recdep
from messages m
join employees se
on m.sender_id = se.employee_id
join employees re
on m.receiver_id = re.employee_id
)
select
round(sum(case when sendep != recdep then 1
else 0 end)
/ count(1) * 100
,1) as inter_department_msg_pct
from message
4번. (도전) 광고 성과 Attribution 분석
select
user_id,
channel
from
(
select
a1.channel,
u1.user_id,
rank() over(partition by u1.user_id order by u1.created_at) rnk
from ad_attribution a1
join user_sessions u1
on a1.session_id = u1.session_id) c
where
rnk = 1
and c.user_id in (
select
user_id
from
(
select
u.user_id,
sum(a.converted) conv
from ad_attribution a
join user_sessions u
on a.session_id = u.session_id
group by u.user_id) t
where conv >= 1
)
order by user_id
⬇︎
- 수정 1) rank → row_number
- 어차피 아이디별로 최소값 구하는 거라 딱히 달라질 건 없지만, 뭔가 row_number 쓰는 게 좋을 것 같은 느낌적인 느낌
- 수정 2) converted=1 사용자만 필터링할 때 굳이 sum 안 해줘도 됨.
select
user_id,
channel
from
(
select
us.session_id,
us.user_id,
aa.channel,
row_number() over (partition by us.user_id order by us.created_at) rn
from ad_attribution aa
join user_sessions us
on aa.session_id = us.session_id
) a
where
session_id in (
select session_id
from ad_attribution
where converted = 1
)
and rn = 1
order by user_id
- 튜터님 쿼리
- 서브쿼리 대신 CTE 활용해서 조건에 만족하는 애들로만 Join 시키기 !
WITH converted_users AS ( -- 구매전환한 고객 필터링한 테이블 생성
SELECT DISTINCT us.user_id
FROM ad_attribution a
JOIN user_sessions us ON a.session_id = us.session_id
WHERE a.converted = TRUE
), first_sessions AS ( -- 위에 필터링해서 남은 애들로만 join -> 그들의 최초접속일 확인을 위한 테이블 생성
SELECT us.user_id, a.channel,
ROW_NUMBER() OVER (PARTITION BY us.user_id ORDER BY us.created_at) AS rn
FROM user_sessions us
JOIN converted_users cu ON us.user_id = cu.user_id
JOIN ad_attribution a ON us.session_id = a.session_id
)
SELECT user_id, channel
FROM first_sessions
WHERE rn = 1
ORDER BY user_id