咪咕圈圈微博:校内网发帖机in Python,请勿滥用

来源:百度文库 编辑:中财网 时间:2024/04/25 04:11:30
Python语言: 校内网发帖机in Python,请勿滥用#!/usr/bin/python
#encoding=utf-8
#使用前请查找并更改用户名和密码

import cookielib, urllib2, urllib, sys, time
from xml.sax.saxutils import unescape
from BeautifulSoup import BeautifulSoup          # For processing HTML

def formalize(text):
    result = ''
    lines = text.split(u'\n')
    for line in lines:
        line = line.strip()
        if len(line) == 0:
            continue
        result += line + u'\n\n'
    return result

#登陆校内网
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
exheaders = [("User-Agent","Mozilla/4.0 (compatible; MSIE 7.1; Windows NT 5.1; SV1)"),]
opener.addheaders=exheaders
url_login = 'http://xiaonei.com/Login.do'
body = (('email','xxxxx@gmail.com'), ('password','*********')) #TODO:更改登录名和密码
print "ERROR! you need to update the password to be successful!"
req1 = opener.open(url_login, urllib.urlencode(body))  #这时,cookie已经进来了。

#下载糗事百科,一个个发帖
body = {'relative_optype':'publisher', 'blogControl':'1'}
url_post = 'http://blog.xiaonei.com/NewEntry.do'

#发帖部分
count = 0
for i in range(11, 12):
    url = "http://qiushibaike.com/qiushi/best/all/page/%d" % i
    data = urllib2.urlopen(url).readlines()
    soup = BeautifulSoup("".join(data))
    contents = soup.findAll('div', "content")
    stories = [str(text) for text in contents]
    for story in stories:
        count += 1
        print "processing page %d, %d items added" % (i, count)
        minisoup = BeautifulSoup(story)
        #text = ''.join([e for e in minisoup.recursiveChildGenerator() if isinstance(e, unicode)])
        #text = urllib.unquote(unescape(text, {'"':'"'}))
        text = str(minisoup)
        #text = text.encode("utf-8")
        title = '糗事-%d' % count
        text += '
来自糗事百科
'
        body['title'] = title
        body['body'] = text
        req2 = opener.open(url_post, urllib.urlencode(body)) #不出意外的话,就已经发帖成功了


所有评论,共7条:( 我也来说两句)

1jfxwc 3年前 回复 踢 2 3 赞 非常cool的代码。墨水实在太棒了。 2liufeng 2年前 回复 踢 2 3 赞 1 糗事百科的地址变动了

2 49行count改成i会更直观一些

3 50行糗事百科的网址换成url的内容更方便。

很棒的代码。正在学BeautifulSoup,很有帮助。
3teloon 1年前 回复 踢 1 0 赞 测试了下,貌似不行诶,墨水现在可以么? 4半瓶墨水 1年前 回复 踢 0 0 赞 @teloon: 很久以前做的,前面liufeng在11个月前就发现有问题了,你也可以自己改改
5米兰猪 1年前 回复 踢 0 0 赞 cool,收藏了 6代码疯子 1年前 回复 踢 0 1 赞 Python原来这么犀利,收藏了。
http://www.programlife.net/
7haibo600 10个月前 回复 踢 1 0 赞 月前就发现有问题了

for i in range(11, 12):
36     url = "http://qiushibaike.com/qiushi/best/all/page/%d" % i
37     data = urllib2.urlopen(url).readlines()
38     soup = BeautifulSoup("".join(data))
39     contents = soup.findAll('div', "content")
40     stories = [str(text) for text in contents]
41     for story in stories:
42         count += 1
43         print "processing page %d, %d items added" % (i, count)
44         minisoup = BeautifulSoup(story)
45         #text = ''.join([e for e in minisoup.recursiveChildGenerator() if isinstance(e, unicode)])
46         #text = urllib.unquote(unescape(text, {'"':'"'}))
47         text = str(minisoup)