数据预处理

2019-08-21  本文已影响0人  哈斯勒

1. 去重

mysql: 
# 查询员工表中涉及到的所有部门编号
SELECT DISTINCT e.`department_id` FROM employees AS e;

2. 拼串

mysql:
# 
SELECT CONCAT('a','b','c') AS result;
SELECT CONCAT(e.`last_name`,' ',e.`first_name`) AS 'name' FROM employees AS e
# select null+10   只要其中一方为null,则结果为肯定为null
SELECT CONCAT(
    e.`last_name`,' ',
    e.`first_name`,',',
    IFNULL(e.`commission_pct`, 0)
  ) AS '奖金率' 
FROM
  employees AS e ;

3. 连续值离散化

mysql:
    case
    when 条件1 then 要显示值1或语句1
    when 条件2 then 要显示值2或语句2
    else 要显示值n或语句n
    end

*/

#案例 查询员工的工资情况
/*
如果工资>20000, 显示A级别
    >15000      B
    >10000      C
否则,显示D级别
*/
SELECT last_name, department_id,salary,
CASE
WHEN salary>20000 THEN 'A'
WHEN salary>15000 THEN 'B'
WHEN salary>10000 THEN 'C'
ELSE 'D'
END AS 工资级别
FROM employees;

4. 计算非空值个数

SELECT COUNT(employee_id)FROM employees;
SELECT COUNT(commission_pct)FROM employees;
#计算不重复的非空值个数
SELECT COUNT(DISTINCT commission_pct) FROM employees;

5. 数据后面添加常数值

#案例:查询每个部门的员工个数
SELECT d.*, cc.c AS c
FROM  departments d
LEFT JOIN (
    SELECT COUNT(*) c,department_id
    FROM employees
    GROUP BY department_id
)AS cc
ON d.`department_id`=cc.department_id;

SELECT d.*,(
    SELECT COUNT(*)
    FROM employees e
    WHERE e.department_id = d.`department_id`
)AS 个数
FROM departments d;

5.获取邮箱用户名

SELECT SUBSTR('123@qq.com',1,INSTR('123@qq.com','@')-1) out_put;
上一篇下一篇

猜你喜欢

热点阅读