Data Analyst KIM

#2. SQL을 이용하여 거래액 데이터 분석(select,from,where,groupby,having,orderby) 본문

데이터 분석/SQL

#2. SQL을 이용하여 거래액 데이터 분석(select,from,where,groupby,having,orderby)

김두연 2023. 5. 10. 22:21
반응형

#2. SQL을 이용하여 거래액 데이터 분석(select,from,where,groupby,having,orderby)

(출처 : '국가통계포털' - 온라인쇼핑몰 판매매체별/상품군별거래액 데이터를 활용)

 

SQL 문법의 구조를 먼저 파악을 해보자.

 

구문 순서 실행되는 순서
select [컬럼명],...          5
from [테이블명] 1
where [조건절]  2
group by [컬럼명 or 컬럼순서] 3
having [집계함수 조건절] 4
order by [컬럼명 or 컬럼순서]          6

 

--1) 데이터 탐색--------------------------------------------------------------

--STEP 1) 모든 컬럼 추출하기
SELECT *  -- 모든 컬럼
FROM gmv_trend

--STEP 2) 특정 컬럼 추출하기
SELECT category , yyyy , mm ,gmv 
FROM gmv_trend

--STEP 3) 중복값 없이 특정 컬럼 추출하기
select distinct category 
from gmv_trend gt 

select distinct yyyy , mm
from gmv_trend gt 


--2) 특정 연도의 매출 탐색--------------------------------------------------------------

--2-1) 조건이 하나일 때 More Example
------a) 숫자열 (between, 대소비교)
select *
from gmv_trend
where yyyy = 2021

select *
from gmv_trend
where yyyy >= 2019

select *
from gmv_trend
where yyyy between 2018 and 2020 --> 2018,2019,2020

select *
from gmv_trend
where yyyy != 2021 --> 2021은 제외

select *
from gmv_trend
where yyyy <> 2021 --> 2021은 제외

------b) 문자열 (=, !=, like, in, not in)
select *
from gmv_trend
where category = '가전·전자·통신기기'

select *
from gmv_trend
where category != '가전·전자·통신기기'

select *
from gmv_trend
where category in ('가전·전자·통신기기','가방') --> 2개이상일 경우 in('','')사용

select *
from gmv_trend
where category not in ('가전·전자·통신기기','가방')

select *
from gmv_trend
where category like '%패션%'  --> like '패션'이라고 하면 패션이라는 단어만 구성된 것을 불러옴
--> 왠만하면 앞뒤로 %를 붙히면 된다,

select *
from gmv_trend
where category like '%패션' 
;
select *
from gmv_trend
where category not like '%패션%'

--2-2) 조건이 여러개일 때--------------------------------------------------------------
------a) and 조건
select *
from gmv_trend
where category = '가전·전자·통신기기' and yyyy = 2021

------b) or 조건
select *
from gmv_trend
where gmv > 10000

select *
from gmv_trend
where gmv > 1000000 or gmv < 10000

------c) and , or 조건 혼용시 ()를 사용해줘야함 
select *
from gmv_trend gt 
where (gmv > 1000000 or gmv < 10000) and yyyy = 2021

--3) 카테고리별 매출 분석--------------------------------------------------------------

--More Example) 카테고리별, 연도별 매출
select category , yyyy, sum(gmv) as total_gmv 
from gmv_trend
group by category , yyyy

select category , yyyy, sum(gmv) as total_gmv 
from gmv_trend
group by 1,2

select yyyy, sum(gmv) as total_gmv 
from gmv_trend
group by 1

select yyyy, sum(gmv) as total_gmv 
from gmv_trend
group by yyyy

select category, sum(gmv) as total_gmv 
from gmv_trend
group by yyyy

select category , yyyy,mm, sum(gmv) as total_gmv 
from gmv_trend
group by 1,2,3

--More Example) 전체 총합(group_by를 하지 않는다)
select sum(gmv) as gmv , min(yyyy) , max(yyyy) , avg(gmv)
from gmv_trend 
--More Example) 집계함수의 종류
sum
min
max
avg
--기본적으로 이정도만 알면 됨

--group by + where 예시(from - where - group by 순으로 써야댐)
select category ,yyyy , sum(gmv) gmv 
from gmv_trend
where category ='의복'
group by 1,2
;
--4)매출이 높은 주요 카테고리만 확인하기--------------------------------------------------------------
select  category ,sum(gmv) gmv 
from gmv_trend
group by 1
having sum(gmv) >= 10000000 
;
--More Example) where절이랑 같이 쓰기(where과 having 차이점 ==> 집계(group by) 전/후 데이터 필터링)
select  category ,sum(gmv) gmv 
from gmv_trend
where yyyy = 2021
group by 1
having sum(gmv) >= 1300000


--5) 매출이 높은 순으로 카테고리 정렬하기--------------------------------------------------------------
select *
from gmv_trend
order by category  ,yyyy,mm,platform_type 

select category , sum(gmv) gmv  
from gmv_trend
group by 1
order by gmv

--내림차순 Example
select category , sum(gmv) gmv  
from gmv_trend
group by 1
order by gmv desc

select category , yyyy,  sum(gmv) gmv  
from gmv_trend
group by 1,2
order by 1,2 desc

select category , yyyy,  sum(gmv) gmv  
from gmv_trend
group by 1,2
order by 1,3 desc
;
--[추가 예제 1] 복수의 컬럼으로 정렬
select category , yyyy,  sum(gmv) gmv  
from gmv_trend
group by 1,2
order by 1 desc,2 desc

select category , yyyy,  sum(gmv) gmv  
from gmv_trend
group by 1,2
order by 3 desc

--[추가 예제 2] select 절에 없는 컬럼으로 정렬 가능할까? -> 불가능
select yyyy , sum(gmv) as gmv
from gmv_trend gt 
group by yyyy 
order by mm  -- 에러!!

select yyyy , sum(gmv) as gmv
from gmv_trend gt 
group by yyyy -- or 1
order by yyyy

 

반응형