爱屋吉屋页面爬下来是空的,该肿么解决?

2016-04-29 14:33:27 +08:00
 Gary_Cheung
iwjw.com 的房源,全部是空页面,肿么办?

headers = {
'Cookie':'IW_UUID2_COOKIES=6a2030687b904bb8bce1c6846c99b1a6; IW_UUID_COOKIES=e3e01670134042d494cd70c725c4698f; iw_user_last_housetype_h5=2; Hm_lvt_3a10bbf8b4afa9e5ab91d66b6944e813=1461846272,1461846860; Hm_lpvt_3a10bbf8b4afa9e5ab91d66b6944e813=1461909938; iw_user_last_housetype=2'
}
url = 'http://m.iwjw.com/sale/shanghai/?kw=%E5%8F%A4%E6%A1%90%E5%85%AC%E5%AF%93'
web_data = requests.get(url,headers=headers)
soup = BeautifulSoup(web_data.text,'lxml')

得到的结果:

/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/Emma_Tang/PycharmProjects/house_to_buy/get_house_iwjw.py
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
<title>二手房列表-上海--爱屋吉屋</title>
<meta content="" name="keywords"/>
<meta content="" name="description"/>
<meta content="no-cache, no-store, must-revalidate" http-equiv="Cache-Control"/>
<meta content="no-cache" http-equiv="Pragma"/>
<meta content="0" http-equiv="Expires"/>
<meta content="爱屋吉屋" name="apple-mobile-web-app-title"/>
<meta content="width=device-width,initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
<meta content="portrait" name="screen-orientation"/>
<meta content="portrait" name="x5-orientation"/>
<meta content="yes" name="apple-mobile-web-app-capable"/>
<link href="http://resource.iwjw.com/iwjw-h5/img/common/screen_logo.jpg" rel="apple-touch-icon-precomposed"/>
<link href="http://resource.iwjw.com/iwjw-h5/common_4.3.2.1.css" rel="stylesheet"/>
<link href="http://resource.iwjw.com/iwjw-h5/list_4.2.css" rel="stylesheet"/>
<script>
window.pageConfig = {
siteUrl: "http://www.iwjw.com",
mobileSiteUrl: 'http://m.iwjw.com',
datacollectUrl:'http://collect.iwjwagent.com/dataCollect/',
recommendUrl:'http://recommend.iwjw.com',
staticUrl: 'http://resource.iwjw.com/iwjw-h5/',
sell: 'true' == 'true' ? true : false,
rent: 'true' == 'true' ? true : false,
provincepy: "shanghai",
provinceid: "2",
locationsJson: [{"flag":"01","id":2,"lat":31.238092,"lineCode":0,"lon":121.479659,"name":"上海","shortSpell":"sh","spell":"shanghai"},{"flag":"02","id":12438,"lat":39.920176,"lineCode":0,"lon":116.407808,"name":"北京","shortSpell":"bj","spell":"beijing"},{"flag":"04","id":40000,"lat":23.135736,"lineCode":0,"lon":113.271143,"name":"广州","shortSpell":"gz","spell":"guangzhou"},{"flag":"03","id":56000,"lat":22.547923,"lineCode":0,"lon":114.063918,"name":"深圳","shortSpell":"sz","spell":"shenzhen"},{"flag":"","id":71049,"lat":30.281202,"lineCode":0,"lon":120.161596,"name":"杭州","shortSpell":"hz","spell":"hangzhou"},{"flag":"","id":71099,"lat":39.137214,"lineCode":0,"lon":117.186954,"name":"天津","shortSpell":"tj","spell":"tianjin"},{"flag":"","id":86724,"lat":32.058854,"lineCode":0,"lon":118.784457,"name":"南京","shortSpell":"nj","spell":"nanjing"},{"flag":"","id":86725,"lat":30.595037,"lineCode":0,"lon":114.291675,"name":"武汉","shortSpell":"wh","spell":"wuhan"},{"flag":"","id":98289,"lat":30.661125,"lineCode":0,"lon":104.071599,"name":"成都","shortSpell":"cd","spell":"chengdu"},{"flag":"","id":98290,"lat":29.561329,"lineCode":0,"lon":106.548784,"name":"重庆","shortSpell":"cq","spell":"chongqing"}],
ht: '2',
platform: ''
};
var _hmt = _hmt || [];
</script>
</head>
<body>
<script>
window.pageConfig = window.pageConfig || {};

window.pageConfig.list = {
provinceid: "2",
provincepy: "shanghai",
provincename: "上海",
housetype: "2",
siteUrl: "http://www.iwjw.com",
prices: '[{"key":-1,"txt":"全部"},{"key":1,"txt":"100 万以下"},{"key":2,"txt":"100-150 万"},{"key":3,"txt":"150-200 万"},{"key":4,"txt":"200-300 万"},{"key":5,"txt":"300-500 万"},{"key":6,"txt":"500-700 万"},{"key":7,"txt":"700-1000 万"},{"key":8,"txt":"1000 万以上"}]',
options: {
kw: "古桐公寓",
g: "0",
areaId: "0",
stationId: "0",
ip: "-1",
ia: "-1",
sp: "-1",
ep: "-1",
sa: "-1",
ea: "-1",
rn: "-1",
fe: "-1",
dt: "-1",
o: "0"
}
};
</script><div id="wx_pic" style="position: absolute;top: -100000px;"><img src="http://resource.iwjw.com/iwjw-h5/img/common/wx_share_logo.jpg"/></div>
<div id="iwjw">
<div class="mod-h5-container">
<div class="mod-list">
</div>
</div>
</div>
<script src="http://resource.iwjw.com/iwjw-h5/common_4.3.4.js"></script>
<script src="http://resource.iwjw.com/iwjw-h5/list_4.3.4.js"></script>
</body>
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "//hm.baidu.com/hm.js?3a10bbf8b4afa9e5ab91d66b6944e813";
hm.async = 1;
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();

var _bdhmProtocol = (("https:" == document.location.protocol) ? " https://" : " http://");
document.write(unescape("%3Cscript async src='" + _bdhmProtocol + "hm.baidu.com/h.js%3Fd3a10bbf8b4afa9e5ab91d66b6944e813' type='text/javascript'%3E%3C/script%3E"));
</script></html>

Process finished with exit code 0
4351 次点击
所在节点    Python
8 条回复
seki
2016-04-29 14:46:29 +08:00
感觉是 ajax 请求的数据?
用浏览器 dev tool 看看有没有 ajax 数据包,转为抓数据包
Gary_Cheung
2016-04-29 14:48:05 +08:00
sorry ,我傻了,已经解决
eoo
2016-04-29 15:01:56 +08:00
。。。。。
wisonic
2016-04-29 16:01:36 +08:00
都是假房源,爬来干嘛
Gary_Cheung
2016-04-29 16:20:15 +08:00
@wisonic 纯属做个小练习 😄
xustrive
2016-04-29 18:53:43 +08:00
@Gary_Cheung 根据一楼解决的?
ayaseangle
2016-04-29 22:20:07 +08:00
大家爬来爬去。。。
Gary_Cheung
2016-04-30 17:03:31 +08:00
@xustrive 是的

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/275320

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX