PythonAI算法

基于 1300+ 招聘数据分析：自学 Python 的就业门槛与要求

综述由AI生成通过分析拉钩网 1300+ 条 Python 招聘信息，探讨了自学 Python 的就业门槛。数据显示，北京、上海、深圳、杭州是主要需求城市，本科及以上学历及 1-5 年经验最受青睐。技能方面，除 Python 外，后端、MySQL、爬虫及算法能力是高频要求。薪资普遍在 20K-35K 区间。文章建议学习者不仅限于语法，需结合数据库与算法，并通过项目积累实战经验，同时做好长期学习的心理准备。

2177283801发布于 2025/2/6更新于 2026/4/267 浏览

自学 Python 到什么程度能找到工作？

随着移动互联网的发展以及机器学习等热门领域带给人们的冲击，越来越多的人开始学习 Python。无论你是科班出身还是非科班转行，Python 无疑都是非常适合入门计算机世界的第一语言。其语法简洁，程序易懂，遵循「简单优雅」的哲学，在保证代码可读的基础上，用尽可能少的代码完成想法。

那么，学到什么程度可以找工作呢？实践是检验真理的唯一标准，当然得看市场需求。毕竟企业招人是来工作的，而不是让你来带薪学习的。

今天我们就试着爬取拉钩网关于 Python 的招聘信息，来看看市场到底需要什么样的人才。

一、网页结构分析

打开拉钩网首页，输入关键字「Python」，接着按 F12 打开网页调试面板，切换到「Network」选项卡下，过滤条件选上「XHR」。一切准备就绪之后点击搜索，仔细观察网页的网络请求数据。

从这些请求中我们可以大致猜测到数据好像是从 jobs/positionAjax.json 这个接口获取的。

可以看出，这些数据是通过 POST 请求获取的，Form Data 中的 pn 就是当前页码了。好了，网页分析好了，接下来就可以写爬虫拉取数据了。

import requests
import time

def headers_to_dict(headers_str):
    # 模拟处理 headers 字符串为字典
    lines = headers_str.strip().split('\n')
    return {k: v for k, v in (line.split(': ', 1) for line in lines)}

url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
headers_str = """
accept: application/json, text/javascript, */*; q=0.01
origin: https://www.lagou.com
referer: https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
"""
headers_dict = headers_to_dict(headers_str)

def get_data_from_cloud(page):
    params = {
        'first': 'false',
        'pn': page,
        'kd': 'python'
    }
    response = requests.post(url, data=params, headers=headers_dict, timeout=3)
    result = response.text
    write_file(result)

for i in ():
    get_data_from_cloud(i + )

{"success":true,"msg":null,"code":0,"content":{"showId":"8302f64","hrInfoMap":{},"status":false,"msg":"您操作太频繁，请稍后再访问",...}}

import requests
import time

def string_util_headers_to_dict(headers_str):
    lines = headers_str.strip().split('\n')
    return {k: v for k, v in (line.split(': ', 1) for line in lines)}

home_url = 'https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD'
url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
headers_str = """
accept: application/json, text/javascript, */*; q=0.01
origin: https://www.lagou.com
referer: https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
"""
headers_dict = string_util_headers_to_dict(headers_str)

def get_data_from_cloud(page):
    params = {
        'first': 'false',
        'pn': page,
        'kd': 'python'
    }
    s = requests.Session()  # 创建一个 session 对象
    s.get(home_url, headers=headers_dict, timeout=3)  # 用 session 对象发出 get 请求，获取 cookie
    cookie = s.cookies
    response = requests.post(url, data=params, headers=headers_dict, cookies=cookie, timeout=3)
    result = response.text
    write_file(result)

def get_data():
    for i in range(76):
        page = i + 1
        get_data_from_cloud(page)
        time.sleep(5)

import json
import pandas as pd

def get_data_from_file():
    with open('data.txt') as f:
        data = []
        for line in f.readlines():
            result = json.loads(line)
            result_list = result['content']['positionResult']['result']
            for item in result_list:
                dict_item = {
                    'city': item['city'],
                    'industryField': item['industryField'],
                    'education': item['education'],
                    'workYear': item['workYear'],
                    'salary': item['salary'],
                    'firstType': item['firstType'],
                    'secondType': item['secondType'],
                    'thirdType': item['thirdType'],
                    'skillLables': ','.join(item['skillLables']),
                    'companyLabelList': ','.join(item['companyLabelList'])
                }
                data.append(dict_item)
        return data

data = get_data_from_file()
df = pd.DataFrame(data)
print(df.head(15))

from pyecharts import Bar, Pie, opts

top = 15
citys_value_counts = df['city'].value_counts()
citys = list(citys_value_counts.head(top).index)
city_counts = list(citys_value_counts.head(top))

bar = (
    Bar()
    .add_xaxis(citys)
    .add_yaxis("", city_counts)
)
bar.render_notebook()

edu_value_counts = df['education'].value_counts()
edu = list(edu_value_counts.index)
edu_counts = list(edu_value_counts)

pie = (
    Pie()
    .add("", [list(z) for z in zip(edu, edu_counts)])
    .set_global_opts(title_opts=opts.TitleOpts(title="学历分布"), legend_opts=opts.LegendOpts(is_show=False))
)
pie.render_notebook()

work_year_value_counts = df['workYear'].value_counts()
work_year = list(work_year_value_counts.index)
work_year_counts = list(work_year_value_counts)

bar = (
    Bar()
    .add_xaxis(work_year)
    .add_yaxis("", work_year_counts)
)
bar.render_notebook()

industrys = list(df['industryField'])
industry_list = [i for item in industrys for i in item.split(',')]
industry_series = pd.Series(data=industry_list)
industry_value_counts = industry_series.value_counts()
industrys = list(industry_value_counts.head(top).index)
industry_counts = list(industry_value_counts.head(top))

pie = (
    Pie()
    .add("", [list(z) for z in zip(industrys, industry_counts)])
    .set_global_opts(title_opts=opts.TitleOpts(title="行业分布"), legend_opts=opts.LegendOpts(is_show=False))
)
pie.render_notebook()

from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np

word_data = df['skillLables'].str.split(',').apply(pd.Series)
word_data = word_data.replace(np.nan, '')
text = word_data.to_string(header=False, index=False)
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5, contour_color="lightblue").generate(text)
plt.figure(figsize=(16, 9))
plt.imshow(wc)
plt.axis('off')
plt.show()

salary_value_counts = df['salary'].value_counts()
top = 15
salary = list(salary_value_counts.head(top).index)
salary_counts = list(salary_value_counts.head(top))

bar = (
    Bar()
    .add_xaxis(salary)
    .add_yaxis("", salary_counts)
    .set_global_opts(xaxis_opts=opts.AxisOpts(name_rotate=0, name="薪资", axislabel_opts={"rotate": 45}))
)
bar.render_notebook()

word_data = df['companyLabelList'].str.split(',').apply(pd.Series)
word_data = word_data.replace(np.nan, '')
text = word_data.to_string(header=False, index=False)
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5, contour_color="lightblue").generate(text)
plt.figure(figsize=(16, 9))
plt.imshow(wc)
plt.axis('off')
plt.show()

基于 1300+ 招聘数据分析：自学 Python 的就业门槛与要求

自学 Python 到什么程度能找到工作？

一、网页结构分析

基于 1300+ 招聘数据分析：自学 Python 的就业门槛与要求

自学 Python 到什么程度能找到工作？

一、网页结构分析

更多推荐文章

相关免费在线工具

二、数据清洗

三、数据分析

1. 城市分布

2. 学历要求

3. 工作年限

4. 行业分布

5. 技能要求

6. 薪资水平

7. 福利待遇

四、总结与建议

更多推荐文章

相关免费在线工具

基于 1300+ 招聘数据分析：自学 Python 的就业门槛与要求

自学 Python 到什么程度能找到工作？

一、网页结构分析

基于 1300+ 招聘数据分析：自学 Python 的就业门槛与要求

自学 Python 到什么程度能找到工作？

一、网页结构分析

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

二、数据清洗

三、数据分析

1. 城市分布

2. 学历要求

3. 工作年限

4. 行业分布

5. 技能要求

6. 薪资水平

7. 福利待遇

四、总结与建议

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具