-
문제1VERTICA/06. Tuning 2017. 2. 13. 11:35-- 조인결과 row수 대략 7~8천억건이 적당 (약5~7 년전 pc 급 3대 클러스터 기준)SELECT count(*)FROM online_sales.ytkim_target AS TARGET INNER JOIN online_sales.ytkim_target AS adjoinON (adjoin.product_key = TARGET.product_key AND adjoin.customer_key <> TARGET.customer_key)-- 아래의 SQL을 개선하는 방법을 찾는 문제 입니다.(그룹함수의 결과는 큰의미가 없습니다.adjoin.customer_key <> TARGET.customer_key 조건절 때문에 발생하는 비효율을 없에는 것이 포인트 입니다.)SELECT TARGET.customer_key AS target_customer_key, adjoin.customer_key AS adjoin_customer_key, count(*), sum(TARGET.sales_dollar_amount), sum(sqrt(TARGET.gross_profit_dollar_amount))FROM online_sales.ytkim_target AS TARGET INNER JOIN online_sales.ytkim_target AS adjoinON (adjoin.product_key = TARGET.product_key AND adjoin.customer_key <> TARGET.customer_key)GROUP BY TARGET.customer_key, adjoin.customer_key--기본제공하는 vertica vmart 에서 데이터 rows만 변경하여 samfile 추출판매이력 테이블을 쓸예정인데기본데이터는 너무 적으므로 초기데이터 4000만건으로 시작실행방법은 바로 아래에...cd /opt/vertica/examples/VMart_Schema./vmart_gen --customer_dimension 20000000 --product_dimension 300000 --online_sales_fact 40000000 --store_dimension 20000 --store_sales_fact 40000000 --years 2015-2016[ --datadirectory <dir-data-files> ][ --files <fact-table-load-files> ][ --null <character-to-indicate-null-value> ][ --seed <for-random-number-generator> ][ --time_file <data-for-Date_Dimension> ][ --store_sales_fact <rows> ][ --store_orders_fact <rows> ][ --product_dimension <rows> ][ --store_dimension <rows> ][ --promotion_dimension <rows> ][ --vendor_dimension <rows> ][ --customer_dimension <rows> ][ --employee_dimension <rows> ][ --warehouse_dimension <rows> ][ --shipping_dimension <rows> ][ --online_page_dimension <rows> ][ --callcenter_dimension <rows> ][ --online_sales_fact <rows> ][ --inventory_fact <rows> ][ --gen_load_script ][ --years startYear-endYear ]-- 대상 테이블 생성vsql 상에서 아래구문수행\i vmart_schema_drop.sql --스키마삭제\i vmart_define_schema.sql --스키마생성\i vmart_load_data.sql --데이터적재-- 원본은 훼손하지말고 online_sales.online_sales_fact 복제하여 test합시다.(재작업시 원본이 있으니 다시 할 수 있음)drop table online_sales.ytkim_target cascade;create table online_sales.ytkim_target as select * from online_sales.online_sales_fact ;-- 랜덤하게 만들어진 데이터이므로 그중 자주사는사람과 자주팔리는 상품 데이터를 추가생성-- 자주사는사람 약 100000명 정도, 많이 팔리는 상품 약 100개 정도를 의도함create TEMPORARY table yt1 on COMMIT PRESERVE rows ASselect * from online_sales.ytkim_target limit 1create TEMPORARY table yt2 on commit PRESERVE rows asselect *from(select * from (select distinct customer_key from online_sales.ytkim_target awhere a.customer_key < 100000) customer cross join(select distinct product_key from online_sales.ytkim_target awhere a.product_key < 100) product) ainsert /*+ direct */ into online_sales.ytkim_targetselectsale_date_key,ship_date_key,yt2.product_key,product_version,yt2.customer_key,call_center_key,online_page_key,shipping_key,warehouse_key,promotion_key,pos_transaction_number,sales_quantity,sales_dollar_amount,ship_dollar_amount,net_dollar_amount,cost_dollar_amount,gross_profit_dollar_amount,transaction_typefrom yt2 cross join yt1commit;-- 조인결과 row수 대략 7~8천억건이 적당 (약5~7 년전 pc 3대 클러스터 기준)SELECT count(*)FROM online_sales.ytkim_target AS TARGET INNER JOIN online_sales.ytkim_target AS adjoinON (adjoin.product_key = TARGET.product_key AND adjoin.customer_key <> TARGET.customer_key)-- 자 데이터는 준비가 되었으니 해당sql에 대한 성능개선을 해주세요.(모든방법을 동원하여~~)SELECT TARGET.customer_key AS target_customer_key, adjoin.customer_key AS adjoin_customer_key, count(*), sum(TARGET.sales_dollar_amount), sum(sqrt(TARGET.gross_profit_dollar_amount))FROM online_sales.ytkim_target AS TARGET INNER JOIN online_sales.ytkim_target AS adjoinON (adjoin.product_key = TARGET.product_key AND adjoin.customer_key <> TARGET.customer_key)GROUP BY TARGET.customer_key, adjoin.customer_key
8.0.1에서 TEST 하였습니다
'VERTICA > 06. Tuning' 카테고리의 다른 글
버티카 성능 진단 (0) 2016.12.14 Explain 과 Profile (0) 2015.08.06 댓글