-- 创建表格emp
DROP TABLE IF EXISTS `emp`;
CREATE TABLE `emp` (
`EMPNO` int(0) NULL DEFAULT NULL,
`ENAME` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
`JOB` varchar(9) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
`MGR` int(0) NULL DEFAULT NULL,
`HIREDATE` date NULL DEFAULT NULL,
`SAL` int(0) NULL DEFAULT NULL,
`COMM` int(0) NULL DEFAULT NULL,
`DEPTNO` int(0) NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- 插入数据
INSERT INTO `emp` VALUES (7566, '琼斯1', '经理', 7839, '1981-04-02', 2975, NULL, 20);
INSERT INTO `emp` VALUES (7654, '马丁', '售货员', 7698, '1981-09-28', 1250, 1400, 30);
INSERT INTO `emp` VALUES (7782, '克拉克', '经理', 7839, '1981-06-09', 2450, NULL, 10);
INSERT INTO `emp` VALUES (7788, '斯科特', '分析师', 7566, '1987-04-19', 3000, NULL, 20);
INSERT INTO `emp` VALUES (7839, '国王', '总统', NULL, '1981-11-17', 5000, NULL, 10);
INSERT INTO `emp` VALUES (7844, '特纳', '售货员', 7698, '1981-09-08', 1500, 0, 30);
INSERT INTO `emp` VALUES (7876, '亚当斯', '店员', 7788, '1987-05-23', 1100, NULL, 20);
INSERT INTO `emp` VALUES (7900, '詹姆斯', '店员', 7698, '1981-12-03', 950, NULL, 30);
INSERT INTO `emp` VALUES (7902, '福特', '分析师', 7566, '1981-12-03', 3000, NULL, 20);
INSERT INTO `emp` VALUES (7521, '沃德', '售货员', 7698, '1981-02-22', 1250, 500, 30);
INSERT INTO `emp` VALUES (7934, '米勒', '店员', 7782, '1982-01-23', 1300, NULL, 10);
INSERT INTO `emp` VALUES (7499, '艾伦', '售货员', 7698, '1981-02-20', 1600, 300, 30);
INSERT INTO `emp` VALUES (7698, '布莱克', '经理', 7839, '1981-05-01', 2850, NULL, 30);
INSERT INTO `emp` VALUES (7566, '琼斯', '经理', 7839, '1981-04-02', 2975, NULL, 20);
SELECT ename,count(1) FROM emp GROUP BY ename HAVING count(1) >1;在group by 之后 count(1)> 1代表其分组数据是重复的。
ALTER TABLE emp add id int first; -- 改变表结构,增加名为id的列
ALTER table emp MODIFY id int PRIMARY key auto_increment; -- 将id字段设置为主键自增;
SELECT min(id) id,ename,count(0) FROM emp GROUP BY ename HAVING count(1) >1;结果如下:
SELECT t1.*,t2.* from结果如下:
emp t1
inner join (SELECT min(id) id,ename,count(0) FROM emp GROUP BY ename HAVING count(1) >1) t2
on t1.ENAME = t2.ENAME;
DELETE a FROM table1 AS a LEFT JOIN table2 AS b ON 连接条件 WHERE 过滤条件;完整去重sql语句如下:
DELETE t1 from
emp t1
inner join (SELECT min(id) id,ename,count(0) FROM emp GROUP BY ename HAVING count(1) >1) t2
on t1.ENAME = t2.ENAME
where t1.id > t2.id;
免责申明:
本文系转载,版权归原作者所有,如若侵权请联系我们进行删除!
《数据治理行业实践白皮书》下载地址:https://fs80.cn/4w2atu
《数栈V6.0产品白皮书》下载地址:https://fs80.cn/cw0iw1
想了解或咨询更多有关袋鼠云大数据产品、行业解决方案、客户案例的朋友,浏览袋鼠云官网:https://www.dtstack.com/?src=bbs
同时,欢迎对大数据开源项目有兴趣的同学加入「袋鼠云开源框架钉钉技术群」,交流最新开源技术信息,群号码:30537511,项目地址:https://github.com/DTStack