hive.aux.jars.path=file:///opt/modules/hive/auxlib/elasticsearch-hadoop-hive-8.8.0.jar
CREATE EXTERNAL TABLE temp.es_external_table ( fieldNameA STRING, fieldNameB STRING ) STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.resource'='es索引名',
'es.nodes'='es_host',
'es.port'='es_port',
'es.mapping.names'='fieldNameA:fieldNameA,fieldNameB:fieldNameB'
);
## 获取指定索引的所有 es 表字段
get_index_field_ret=`curl http://${es_address}/${index_name}?pretty=true`
field_arr=`echo ${get_index_field_ret} | jq -r ".${index_name}.mappings.properties | keys | join(\" \")"`
## 创建 hive 外表
temp_table_name="temp.es_${index_name}"
temp_rename_table_name="${hive_db}.es_${index_name}_bak"
actual_table_name="${hive_db}.es_${index_name}"
create_external_table_sql="CREATE EXTERNAL TABLE ${temp_table_name} ("
for current_field in ${field_arr[@]}
do
create_external_table_sql="${create_external_table_sql} ${current_field} STRING,"
done
create_external_table_sql=`echo ${create_external_table_sql} | sed 's/,$//g'`
### 组装 es.mapping.names
create_external_table_sql="${create_external_table_sql}) STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler' TBLPROPERTIES('es.resource'='${index_name}','es.nodes'='${es_host}','es.port'='${es_port}','es.mapping.names'='"
for current_field in ${field_arr[@]}
do
create_external_table_sql="${create_external_table_sql}${current_field}:${current_field},"
done
create_external_table_sql=`echo ${create_external_table_sql} | sed 's/,$//g'`
create_external_table_sql="${create_external_table_sql}')"
drop_external_table_sql="drop table if exists ${temp_table_name}"
echo "create external sql: ${create_external_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${drop_external_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${create_external_table_sql}"
## 创建 hive 临时内表
create_temp_table_sql="CREATE TABLE ${temp_rename_table_name} AS SELECT * FROM ${temp_table_name}"
drop_temp_table_sql="drop table if exists ${temp_rename_table_name}"
echo "create temp table sql: ${create_temp_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${drop_temp_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${create_temp_table_sql}"
## 重命名表(用于快速重建用户直接用的表)
create_actual_table_sql="ALTER TABLE ${temp_rename_table_name} RENAME TO ${actual_table_name}"
drop_actual_table_sql="drop table if exists ${actual_table_name}"
echo "create actual table sql: ${create_actual_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${drop_actual_table_sql}"
beeline -n ${hive_user} -u ${hive_server} -e "${create_actual_table_sql}"
# es_to_hive_parent.job
type=command
command=echo "es to hive success!"
dependencies=table1,table2,table3,table4,table5,table6,table7
# table1.job
type=flow
job.name=table1
flow.name=ES_TO_HIVE
index.name=es索引名
hive_db=目标 hive 库名
# ES_TO_HIVE.job
type=command
command=sh create_hive_to_es_table.sh ${es.address} ${index.name} ${hive.server} ${hive.user} ${hive.db}
免责申明:
本文系转载,版权归原作者所有,如若侵权请联系我们进行删除!
《数据治理行业实践白皮书》下载地址:https://fs80.cn/4w2atu
《数栈V6.0产品白皮书》下载地址:https://fs80.cn/cw0iw1
想了解或咨询更多有关袋鼠云大数据产品、行业解决方案、客户案例的朋友,浏览袋鼠云官网:https://www.dtstack.com/?src=bbs
同时,欢迎对大数据开源项目有兴趣的同学加入「袋鼠云开源框架钉钉技术群」,交流最新开源技术信息,群号码:30537511,项目地址:https://github.com/DTStack