python3学习中一些琐碎东西的存档

与其说CSDN博客作为一个分享平台，还不如说，它是个代码归档存储仓库。

beautifulsoup的基本用法总结

soup=BeautifulSoup(html)#创建Beautifulsoup对象
soup.prettify()#结构化soup.tag#取标签
soup.tag.name#取标签名
soup.tag.attrs#取标签属性
soup.tag["attrname"]#取标签相应属性的值，也可用soup.tag.get("attrname")soup.tag.string#取出标签内容
soup.tag.contents#将tag的子节点以列表的方式输出
soup.tag.get_text()#取内容soup.find_all('tag')#找到所有tag标签
soup.find_all(['tag1','tag2'])#找到所有tag1标签和tag2标签
soup.find_all(re.compile('^b'))#正则查找
soup.find_all(id='idname')#标签属性查找,注意,若属性名和关键字冲突，可以用形如soup.find(attrs={"name":"sakai_csrf_token"})的方式
soup.find_all(id=re.compile('^a'))#标签属性加正则
soup.find_all(id='idname',href=re.compile('^hrefb'))#多重限制查找
soup.find_all(text=re.compile('^abc'))#内容查找def has_class_but_no_id(tag):return tag.has_attr('class') and not tag.has_attr('id')
soup.find_all(has_class_but_no_id)#方法查找
soup.find('tag')#找一个，所有的find_all都可以类推for i in soup.tag.children:print(i)#遍历子标签
for i in soup.descendants:print(i)#遍历子孙标签
for line i soup.strings:print(repr(i))#遍历内容soup.tag.parent#父节点for parent in  content.parents:print(parent.name)#遍历所有父节点soup.tab.next_sibling.next_sibling#下下个兄弟节点
soup.tab.previous_sibling .previous_sibling#上上个兄弟节点for i in soup.tag.next_siblings:print(i)#遍历所有上兄弟节点soup.tag.next_element#上一个节点不一定是兄弟
soup.a.previous_element

一个爬虫的基本框架（urllib）

# -*- coding: utf-8 -*-
import urllib.request#网址
url = "https://www.douban.com/"#请求
request = urllib.request.Request(url)#爬取结果
response = urllib.request.urlopen(request)data = response.read()#设置解码方式
data = data.decode('utf-8')#打印结果
print(data)#打印爬取网页的各类信息print(type(response))
print(response.geturl())
print(response.info())
print(response.getcode())

一个爬虫的基本框架（session，微博）

# -*- coding: utf-8 -*-
import requests
url = 'https://passport.weibo.cn/sso/login'
dat = {
'username':'13269500113',
'password':'mima',
'savestate':'1',
'r':'http://m.weibo.cn/',
'ec':'0',
'pagerefer':'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F',
'entry':'mweibo',
'wentry':'',
'loginfrom':'',
'client_id':'',
'code':'',
'qq':'',
'mainpageflag':'1',
'hff':'',
'hfp':''}
header = {
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Content-Length':'281',
'Content-Type':'application/x-www-form-urlencoded',
#Cookie:SCF=AljbDN-Nw8b030ODeIsZ759eA7Vc_K3VPRnGqEY-2-it2vHSOz20e6iHphdYbH0sXoGX4X_HW_qjMr4RL-PeAEY.; _T_WM=35740326be0e169c0e0012349732b12f; SUHB=0oUoLaPQIcy_Mi
'Host':'passport.weibo.cn',
'Origin':'https://passport.weibo.cn',
'Referer':'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}session = requests.session()
response = session.post(url,data=dat,headers=header)
html = session.get('https://m.weibo.cn')
#html.encoding = 'gb2312'
#content = html.text

session例子：国科大课程监控【初稿】

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 19 09:48:55 2018@author: LuSong
"""
#国科大自动选课脚本from __future__ import print_function
import re
import time
import json
import requests
from bs4 import BeautifulSoup
#from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import codecs
from imp import reload
import sys
reload(sys) with open("./private.txt") as f:courses = []for i, line in enumerate(f):if i < 3: continuecourses.append(line.strip())#strip去掉换行with codecs.open(r'./private.txt', "r", 'utf-8') as f:username = password = Nonefor i, line in enumerate(f):if i == 0:line = bytes(line.encode('utf-8'))#utf-8编码后，转为字节类型if line[:3] == codecs.BOM_UTF8:#容错机制line = line[3:]username = line.decode('utf-8').strip()elif i == 1:password = line.strip()elif i == 2:mailto_list = line.strip().split()#split 按空格读入不同的邮箱else:break#mailto_list = ["lusongno1@qq.com","taoo152805@126.com"]  #目标邮箱，只有这里改成你自己的邮箱#mail_host = "smtp.163.com"
#mail_user = "lusongcool@163.com"
#mail_pass = "pswd"  #163邮箱smtp生成的密码mail_host = "smtp.126.com"
mail_user = "lusongcool@126.com"
mail_pass = "pswd"  #163邮箱smtp生成的密码def send_mail(to_list, sub, content):me = "LogServer"+"<"+mail_user+">"msg = MIMEText(content, _subtype='plain', _charset='utf-8')msg['Subject'] = sub    msg['From'] = memsg['To'] = ";".join(to_list)try:server = smtplib.SMTP(mail_host, 25)#修改了一下端口可以了。
#        server.set_debuglevel(1)server.connect(mail_host)server.login(mail_user, mail_pass)server.sendmail(me, to_list, msg.as_string())server.close()return Trueexcept (Exception) as e:print(str(e))return False
#变量的初始化
session = None
headers = None
jwxk_html = None
#course = [['021M2028H', '0'], ['021M2028H', '1']]
#username = 'lusongcool@163.com'
#password = 'pswd'
#cnt = 0
#__BEAUTIFULSOUPPARSE = 'html5lib'#登录系统
session = requests.session()
login_url = 'http://onestop.ucas.ac.cn/Ajax/Login/0'#提交信息地址，这个地址不需要验证码
headers=  {'Host': 'onestop.ucas.ac.cn',"Connection": "keep-alive",'Referer': 'http://onestop.ucas.ac.cn/home/index','X-Requested-With': 'XMLHttpRequest',"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",}
post_data = {"username": username,"password": password,"remember": 'checked',}
html = session.post(login_url, data=post_data, headers=headers).text
res = json.loads(html)#登录地址是一回事，提交数据地址是一回事，返回的地址是一回事，这里打开返回的地址
html = session.get(res['msg']).text#利用Identity进入选课系统
#打开选课系统#获取Identity
url = "http://sep.ucas.ac.cn/portal/site/226/821"
r = session.get(url, headers=headers)
#f = open('r.html','w+',encoding='utf-8')
#f.write(r.text)
#f.closecode = re.findall(r'"http://jwxk.ucas.ac.cn/login\?Identity=(.*)"', r.text)[0]
#打开选课系统
url = "http://jwxk.ucas.ac.cn/login?Identity=" + code
#headers['Host'] = "jwxk.ucas.ac.cn"
r = session.get(url, headers=headers)
temp = r.text
#f = open('temp.html','w+',encoding='utf-8')
#f.write(temp)
#f.close#url = 'http://jwxk.ucas.ac.cn/courseManage/main'
#r = session.get(url, headers=headers)
#jwxk_html = r.text
#f = open('jwxk_html.html','w+',encoding='utf-8')
#f.write(jwxk_html)
#f.closecount = 0
while 1:time.sleep(1)count = count + 1print(count)url = 'http://jwxk.ucas.ac.cn/course/termSchedule'r = session.get(url, headers=headers)jwxk_html = r.text
#    f = open('termSchedule.html','w+',encoding='utf-8')
#    f.write(jwxk_html)
#    f.closesoup=BeautifulSoup(jwxk_html,'lxml')#   print(soup.prettify())
#    f = open('soupprettify.html','w+',encoding='utf-8')
#    f.write(soup.prettify())
#    f.closesoup = soup.table#   courses = ['23MGB003H-21']#这里改成你要监控的课程编号们for course in courses:course = re.compile(course)course_ind = soup.find_all(target='_blank',string=course)#course_ind = soup.find_all(string=course)course_info = course_ind[0].parent.parentinfomation = course_info.find_all('td')lim_num = int(infomation[6].string)num = int(infomation[7].string)item = infomation[2].stringcourse_left = lim_num-numif course_left > 0:#   flag = send_mail(mailto_list,'nihao','haoya')flag = send_mail(mailto_list,item+'课程可选',course_info.text +'\n\n'+ '余量为：'+str(course_left))if flag:print('有课余量，发送成功！'+item+'余量为：'+str(course_left))else:print('发送邮件失败！')#html = jwxk_html
#regular = r'<label for="id_([\S]+)">' + course[0][0][:2] + r'-'
#institute_id = re.findall(regular, html)[0]
#url = 'http://jwxk.ucas.ac.cn' + \
#              re.findall(r'<form id="regfrm2" name="regfrm2" action="([\S]+)" \S*class=', html)[0]
#post_data = {'deptIds': institute_id, 'sb': '0'}
#
#html = session.post(url, data=post_data, headers=headers).text

学习语言最好的方式是去看代码，然后动手去尝试体会，而不是看一些杂七杂八的文字总结和所谓的视频教程。善于利用百度，你也就成功了一半。

python3学习中一些琐碎东西的存档相关推荐

实验楼python3中挑战一_实验楼python3学习挑战项目
实验楼python3学习挑战项目在实验楼学习python3做的挑战项目做的代码整理. 链接:https://www.shiyanlou.com/courses/596 1.圆的面积题目: 能够计算 ...
【ML/DL】python3学习《机器学习实战》书中的报错及解决办法
python3学习<机器学习实战>书中的报错及解决办法(更新中) <机器学习实战>是一本很不错的书,其采用的是py2的语法格式,在用最新的python3.6编写代码的过程中,书 ...
如何迅速有效学习web前端开发？在学习中你更应该注重哪些东西
什么是高效率学习? 一:追求学习的性价比学习性价比 = 所学到的有用的知识 ÷ 花费的时间 **ps:**如果你用了一年时间,还不能仿站,显然效率是低的. 想要学习性价比高,就尽力向这两点努力: * ...
8个计算机视觉深度学习中常见的Bug
点击上方"小白学视觉",选择加"星标"或"置顶" 重磅干货,第一时间送达本文转自|人工智能与算法学习导读给大家总结了8个计算机视觉深度 ...
Deepmind讲座:深度学习中的记忆和注意力注意力机制发展史与详解
DeepMind x UCL | Deep Learning Lectures | 8/12 | Attention and Memory in Deep Learning(机翻讲稿) 图片外链失效, ...
iOS开发几年了,你清楚OC中的这些东西么!!!?
iOS开发几年了,你清楚OC中的这些东西么!!!? 前言几年前笔者是使用Objective-C进行iOS开发, 不过在两年前Apple发布swift的时候,就开始了swift的学习, 在swift1 ...
深度学习中的注意力机制（二）
作者 | 蘑菇先生来源 | NewBeeNLP 目前深度学习中热点之一就是注意力机制(Attention Mechanisms).Attention源于人类视觉系统,当人类观察外界事物的时候,一般不 ...
深度学习中GPU和显存分析
点击上方"小白学视觉",选择加"星标"或"置顶" 重磅干货,第一时间送达转自:机器学习AI算法工程深度学习最吃机器,耗资源,在本文,我将 ...
深度学习中的优化算法与实现
点击上方"3D视觉工坊",选择"星标" 干货第一时间送达 GiantPandaCV导语:这篇文章的内容主要是参考沐神的mxnet/gluon视频中,Aston ...

python3学习中一些琐碎东西的存档

beautifulsoup的基本用法总结

一个爬虫的基本框架（urllib）

一个爬虫的基本框架（session，微博）

session例子：国科大课程监控【初稿】

python3学习中一些琐碎东西的存档相关推荐

最新文章

热门文章