Java noteJava 杂谈技术干货

招聘数据分析+源码

2018-03-28  本文已影响12人  chcvn

第一次做大数据分析,有点小小的不顺利.不过还好,能出来东西,反正数据大家拿到以后,你们自己也可以做分析.

工资比较高的城市,根据百度招聘信息来分析的.

image

根据市场需求分析,开发人员的年限

image

市场人才需求分析

image

各大城市对开发经验的要求

image

数据爬取,源码

public class GetDate {
    
    public static void main(String[] args) throws JSONException {
        String srr[]={"北京","上海","广州","天津","武汉","沈阳","哈尔滨","西安","南京","成都","重庆 大城市;深圳","杭州","青岛","苏州","太原","郑州","济南","长春"," 合肥",
                "长沙","南昌","无锡","昆明","宁波","福州","石家庄 较大的城市;南宁","徐州","烟台","唐山","柳州","常州","鞍山","厦门","抚顺","吉林市","洛阳","大同","包头",
                "大庆","淄博","乌鲁木齐","佛山","呼和浩特","齐齐哈尔","泉州","西宁","兰州","贵阳","温州"};
        String brr[]={"java","python","C++",".NET","WEB前端","UI设计师","Android","IOS","PHP","C","C#","R","Swift","GO","大数据"};
        //java
        //String urlX="http://zhaopin.baidu.com/api/quanzhiasync?query=java&sort_type=1&detailmode=close&rn=20&pn=";
        //python
        //String urlX="http://zhaopin.baidu.com/api/quanzhiasync?query=python&sort_type=1&detailmode=close&rn=20&pn=";
        //c++
        String urlX="http://zhaopin.baidu.com/api/quanzhiasync?sort_type=1&detailmode=close&rn=20&pn=";
        for (int d = 0; d < brr.length; d++) {
            String query=brr[d];
            System.err.println(query);
            for (int c = 0; c < srr.length; c++) {
                //城市列表 city=%E6%9D%AD%E5%B7%9E&
                String city =srr[c];
                for (int j =0; j <=740; j+=20) {
                    try{
                    String url=urlX+j+"&city="+city+"&query="+query;
                    
                    String json=loadJSON(url);
                    json=jsonJX(json);
                    //JSONObject jsonObject =new JSONObject(json);
                    JSONArray array = new JSONArray(json);//将json字符串转成json数组
                    for (int i = 0; i < array.length(); i++) {
                        JSONObject ob  = (JSONObject) array.get(i);//得到json对象
                        insert(ob.toString());
                    }
                    }catch (Exception e) {
                        System.err.println(".................错误................");
                    }
                }
            }
        }
    }

    //存数据库
    public static void insert(String json){
        
        try {
            //String jsons=json.substring(1, json.length()-1);
            JSONObject jsonObject =new JSONObject(json);
            String jobfirstclass=jsonObject.getString("jobfirstclass");
            String joblink=jsonObject.getString("joblink");
            String experience=jsonObject.getString("experience");
            String education=jsonObject.getString("education");
            String employertype=jsonObject.getString("employertype");
            String ori_city=jsonObject.getString("ori_city");
            String salary=jsonObject.getString("salary");
            String title=jsonObject.getString("title");
            
            
            String sql="insert into Baidu (jobfirstclass,joblink,experience,education,employertype,ori_city,salary,title) VALUES(?,?,?,?,?,?,?,?)";
            Object [] obj={jobfirstclass,joblink,experience,education,employertype,ori_city,salary,title};
            DataSource dataSource = DBUtils.getDataSource();
            QueryRunner qr = new QueryRunner(dataSource);
         
            qr.execute(sql, obj);
         } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        //salary
    }
    
    //百度json解析
    public static String jsonJX(String json) throws JSONException{
        JSONObject jsonObject =new JSONObject(json);
        String date2=new JSONObject(jsonObject.getString("data")).getString("main");
        String Date3=new JSONObject(date2).getString("data");
        String BaiduDate=new JSONObject(Date3).getString("disp_data");
        System.out.println(BaiduDate);
        return BaiduDate;
    }
    //获取Json数据
    public static String loadJSON (String url) {
           StringBuilder json = new StringBuilder();
            try {
                URL oracle = new URL(url);
                URLConnection yc = oracle.openConnection();
                BufferedReader in = new BufferedReader(new InputStreamReader(
                                            yc.getInputStream()));
               
                String inputLine = null;
                while ( (inputLine = in.readLine()) != null) {
                    json.append(inputLine);
                }
                in.close();
            } catch (MalformedURLException e) {
            } catch (IOException e) {
            }
            return json.toString();
        }

jar包:


image.png

工具类

public class DBUtils {
    


    // 获得c3p0连接池对象
    private static  ComboPooledDataSource dataSource = new ComboPooledDataSource();  
    static {
        
        // 对池进行四大参数的配置  
        try {
            dataSource.setDriverClass("com.mysql.jdbc.Driver");
        } catch (PropertyVetoException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  
        dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/stone?useUnicode=true&characterEncoding=UTF-8");  
        dataSource.setUser("root");  
        dataSource.setPassword("admin");    
        // 池配置  
        //每次新增多少连接  
        dataSource.setAcquireIncrement(5);  
        //初始连接数多少  
        dataSource.setInitialPoolSize(20);  
        //最少连接数  
        dataSource.setMinPoolSize(2);  
        //最大连接数  
        dataSource.setMaxPoolSize(50);  
    }

    /**
     * 获得数据库连接对象
     *
     * @return
     * @throws SQLException
     */
    public static Connection getConnection() throws SQLException {
        return dataSource.getConnection();
    }

    /**
     * 获得c3p0连接池对象
     * @return
     */
    public static DataSource getDataSource() {
        return dataSource;
    }
}

然后给大家看效果:
有些数据爬不到,应该是百度的反爬机制.(所以建议大家把上面的主方法,拆开运行)


image.png

源码仅供学习,禁用于商业用途,转载,请留原链接和作者

上一篇 下一篇

猜你喜欢

热点阅读