如何获取这段 HTML 源码中的这两个企业的信息

2020-10-25 11:11:03 +08:00
 songdg
<html>
<head>
<title>综合业务查询结果</title>
<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<link rel="stylesheet" href="/aic/skins/1/style.css" type="text/css">
<script type="text/javascript" src="/aic/common/check.js"></script>
<script language=javascript>
function Check() {
if (confirm("是否关闭窗口?")) {
window.close();
}
}

function call(htmlurl, corpName) {
var width = 980;
var height = 620;
var left = (screen.availWidth - width) / 2;
var top = (screen.availHeight - height) / 2;
var option = "top=" + top + ",left=" + left + ",scrollbars=yes,toolbar=no,menubar=no,status=no,resizable=yes,"
+ "width=" + width + ",height=" + height;
var newwin = window.open(htmlurl + "&corpName=" + replaceSpecial(corpName), "EntityInfo", option);
newwin.focus();
//newwin.focus();
//var newwin = window.open(htmlurl, "", "top=150,left=150,toolbar=no,location=no,directories=no,status=no,menubar=no,scrollbars=no,resizable=yes,width=800,height=600");
newwin.focus();
}
/**
*当查询结果为空时,打印工商登记资料查询情况并且填写申请人姓名和企业名称
*/
function editInfo(){
var table = document.getElementById("info");
if(table.style.display=='none'){
table.style.display="block";
} else if(table.style.display=='block'){
table.style.display="none";
}
}
function checkform2(){
var r=document.getElementById("corpName").value;
var c=document.getElementById("legName").value;
if (r == "" || r.replace(/\s+/g, "") == ""||c == "" || c.replace(/\s+/g, "") == ""){
alert("请填写企业名称和申请人名称");
return false;
}else{
return true;
}
}
</SCRIPT>
</head>

<body bgcolor="#FFFFFF" text="#000000" leftmargin="0" topmargin="0">
<br>

<link rel="stylesheet" href="/aic/skins/1/style.css" type="text/css">

<script src=/aic/com/Tantom/Tantom.js></script>
<script>
system.Import("com.tantom.js.dom.JDom");
system.Import("com.tantom.js.util.Checker");
system.Import("com.tantom.js.util.StyleUtil");
system.Import("com.tantom.js.help.SampleHelp");
</script>
<script language=javascript>
function openBizHelp(entityType, acceptType, actType) {
var para = "entityType=" + entityType + "&acceptType=" + acceptType + "&actType=" + actType + "&url=" + document.location;
var
newwin = window.open("/aic/Public/BizHelp.jsp?" + para, "help", "top=150,left=150,toolbar=no,location=no,directories=no,status=no,menubar=no,scrollbars=no,resizable=yes,width=800,height=600");
newwin.focus();
}

function openSRPrint() {
var newprint = window.open("/aic/Public/ResultPrint.jsp", "", "top=150,left=150,toolbar=no,location=no,directories=no,status=no,menubar=no,scrollbars=no,resizable=yes,width=800,height=400");
newprint.focus();
}
</SCRIPT>
<script src=/aic/aicexi/common/Tantom/Tantom.js></script>
<script>
system.Import("com.tantom.js.dom.JDom");
system.Import("com.tantom.js.util.Checker");
system.Import("com.tantom.js.util.StyleUtil");
system.Import("com.tantom.js.help.SampleHelp");
</script>
<script src="/aic/aicexi/common/WindowManager.js"></script>
<link rel="stylesheet" href="/aic/skins/1/style.css" type="text/css">
<script language=javascript>
function openBizHelp(entityType, acceptType, actType) {
var para = "entityType=" + entityType + "&acceptType=" + acceptType + "&actType=" + actType + "&url=" + document.location;
var newwin = window.open("/aic/Public/BizHelp.jsp?" + para, "newWin_searchout", "top=150,left=150,toolbar=no,location=no,directories=no,status=no,menubar=no,scrollbars=yes,resizable=yes,width=800,height=600");
newwin.focus();
}
function openBizHelp(helpID) {
var newwin = window.open("/aic/Public/BizHelp.jsp?helpID=" + helpID, "newWin_searchout", "top=150,left=150,toolbar=no,location=no,directories=no,status=no,menubar=no,scrollbars=yes,resizable=yes,width=800,height=600");
newwin.focus();
}
</script>

<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<script language=javascript>
/**
* 调用帮助
*/
function __callHelp(helpid) {
var _c = "/aic";
var p = [];
p.argu = document.location.href;
p.less = true;
p.width = "1000px";
p.height = "700px";
p.url = _c + "/help/index.jsp?helpid=" + helpid;
var sFea = "dialogHeight:" + p.height + ";dialogWidth:" + p.width + ";center:yes;help:no;resizable:yes;status:no";
window.showModelessDialog(_c + "/help/dialog.jsp?url=" + p.url, p.argu, sFea);
}
</script>
<table width="840" border="0" cellspacing="0" cellpadding="0" height="18" align="center">
<tr>
<td width="60%" height="18" align="right">
<table class="main_table2" border="0" cellspacing="0" cellpadding="0" align="right">
<tr>

<td align="center">&nbsp;</td>
<td align="center" width="18">&nbsp;</td>

<td align="center" width="18" colspan='7'>&nbsp;</td>

<td align="center"><a href="javascript:__callHelp('jdqydjxxcx')"><font color="black"><b>操作帮助</b></font>&nbsp;
<img src="/aic/help/img/czhelp.gif" width="25" height="25" border="0" align="absmiddle"></a></td>

</tr>
</table>
</td>
</tr>
</table>
<table width="840" border="0" cellspacing="0" cellpadding="0" align="center">

<tr>
<td background="/aic/images/topbar-bg2.gif" height="1"></td>
</tr>
<tr>
<td bgcolor="#5B6371" height="1"></td>
</tr>

</table>


<form name=form1 method="post" onsubmit="return checkform1()" action="RegisterSearchResult.jsp">
<table class="main_table" width="840" border="1" cellspacing="0" align="center" bordercolor="#A5ABB6"
bordercolordark="#ffffff">
<tr class="main_datalist_thead_sub" align="center">
<td width="4%" align="center">序号</td>
<td width="9%" align="center"><a href="/aic/SearchControl?action=enterpriseReg&isSimpleSearch=true&orderBy=a.registerNo">注册号 /<br>统一社会信用代码</a></td>
<td width="18%" align="center"><a href="/aic/SearchControl?action=enterpriseReg&isSimpleSearch=true&orderBy=a.corpName">名&nbsp;&nbsp;称</a></td>
<td width="5%" align="center">字号</td>
<td width="21%" align="center">地址 /住所 /经营场所</td>
<td width="7%" align="center"><a href="/aic/SearchControl?action=enterpriseReg&isSimpleSearch=true&orderBy=a.name">法定代表人 /<br>负责人 /合伙人</a></td>
<td width="10%" align="center"><a href="/aic/SearchControl?action=enterpriseReg&isSimpleSearch=true&orderBy=a.entityTypeID">主体类型</a></td>
<td width="10%" align="center">管辖单位</td>
<td width="8%" align="center">联系电话</td>
<td width="10%" align="center"><a href="/aic/SearchControl?action=enterpriseReg&isSimpleSearch=true&orderBy=a.enterpriseStatusID">状 态</a></td>
</tr>

<tr align="center" class=main_datalist_row_b>
<td>&nbsp;1
</td>
<td>&nbsp;<a
href="javascript:call('/aic/SearchCorporationTask?service=colligateSearchInterface&sFromSupervision=fromSupSearch&isUseDataCentre=true&entityNo=186579c3-010b-1000-e001-06a80a0c0115','天派包装制品有限公司')">91441900787974676B
</a></td>
<td>&nbsp;天派包装制品有限公司
</td>
<td>&nbsp;天派
</td>
<td>&nbsp;立新科技工业园内 B 区第 x 座
</td>
<td>&nbsp;小兵
</td>
<td>&nbsp;有限责任公司
</tD>
<td>&nbsp;东城分局
</tD>

<td>&nbsp;86130686
</tD>

<td>&nbsp;登记成立
</td>
</tr>


<tr align="center" class=main_datalist_row_a>
<td>&nbsp;2
</td>
<td>&nbsp;<a
href="javascript:call('/aic/SearchCorporationTask?service=colligateSearchInterface&sFromSupervision=fromSupSearch&isUseDataCentre=true&entityNo=bbca8b98-014d-1000-e000-446e0a0c0115',天派包装制品有限公司东城分公司')">914419003453789301
</a></td>
<td>&nbsp;天派包装制品有限公司东城分公司
</td>
<td>&nbsp;
</td>
<td>&nbsp;石井工业区
</td>
<td>&nbsp;李中
</td>
<td>&nbsp;内资分公司
</tD>
<td>&nbsp;东城分局
</tD>

<td>&nbsp;13333333333
</tD>

<td>&nbsp;已注销
</td>
</tr>
1894 次点击
所在节点    程序员
8 条回复
ruoxie
2020-10-25 12:11:57 +08:00
基本每个语言都有解析 html 的库,github 搜一搜
ydpro
2020-10-25 13:07:19 +08:00
python selenium 库,很简单的库,稍微看下就能上手
vone
2020-10-25 19:22:03 +08:00
shawndev
2020-10-26 09:38:26 +08:00
keypoint: main_datalist_row_
songdg
2020-10-28 14:02:30 +08:00
@ruoxie 谢谢,我用的是 autohotkey,估计没有这样的库。
songdg
2020-10-28 14:03:17 +08:00
@ydpro 谢谢可惜使用的是 autohotkey
songdg
2020-10-28 14:03:41 +08:00
@vone 谢谢可惜使用的是 autohotkey
songdg
2020-10-28 14:04:10 +08:00
@shawndev 非常感谢,问题解决了。

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/718334

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX