20
2015
04

2015年,软件展示,采集淘宝商品说明,这是一个配套的采集阿里妈妈的网站!

阿里妈妈的采集器是这样:
最近增加了短网址:
从阿里妈妈采集的资料,然后,要补充内容,让搜索引擎收录!

自从没有商品详情的api后,就想到这个办法,不过采集太多,会被淘宝强制登入,

当当,从阿里妈妈采集到的资料,在从淘宝采集的内容,感觉是没有什么特色,换来的只有点击,也也有想过,去采集爱淘宝的链接,感觉,能转换的,不大,对于这个功能,原来,还是采集1.6W的资料,

软件界面是这样!


下面是软件的代码!

import win.ui;
import web.form;
import fsys.ini;
import mysql.client;
import inet.whttp;
import web.form.util;




var ini   = fsys.ini("\config.ini");

var dbini = ini.getSection("STR");
 
link,err = mysql.client(
    server = dbini.dbserver; 
//数据库服务器,可省略默认为localhost
    uid = dbini.dbuser;//用户名,可省略默认为root
    pwd = dbini.dbpass;
);


//如果数据库不存在创建数据库,并自动选择数据库
link.createDb(dbini.dbname);


var this_tables = "";

var table_push  = "";

var alimama_plus = "";

    
        
//设置编码
link.query("SET NAMES  gbk ");

var str_create = /*
create table `alinfo[domain]` select * from alinfo;
*/
;
/*DSG{{*/
winform = ..win.form(text=
"淘宝采集15.01.12";right=971;bottom=487)
winform.add(
btn_start={cls=
"button";text="开始";left=707;top=5;right=766;bottom=65;dr=1;dt=1;font=LOGFONT( name='新宋体';h=-14 );z=3};
btn_tongbu={cls=
"button";text="单步同步";left=766;top=5;right=825;bottom=65;dr=1;dt=1;z=7};
btn_tongbu_more={cls=
"button";text="批量同步";left=825;top=5;right=884;bottom=65;dr=1;dt=1;z=13};
groupbox={cls=
"groupbox";text="选区";left=704;top=74;right=966;bottom=168;dr=1;dt=1;edge=1;z=4};
mao_login={cls=
"button";text="天猫登入";left=884;top=35;right=957;bottom=64;disabled=1;dr=1;dt=1;font=LOGFONT( name='新宋体';h=-14 );z=15};
static={cls=
"static";text="项目列表:";left=722;top=94;right=777;bottom=112;dr=1;dt=1;transparent=1;z=5};
static2={cls=
"static";text="alimama表:";left=708;top=121;right=776;bottom=138;align="right";dr=1;dt=1;transparent=1;z=8};
static3={cls=
"static";text="alinfo表:";left=719;top=144;right=776;bottom=158;align="right";dr=1;dt=1;transparent=1;z=9};
tao_login={cls=
"button";text="淘宝登入";left=884;top=6;right=957;bottom=35;disabled=1;dr=1;dt=1;font=LOGFONT( name='新宋体';h=-14 );z=14};
txt_count={cls=
"edit";text="1";left=912;top=89;right=955;bottom=111;align="right";dr=1;dt=1;edge=1;num=1;z=12};
txt_info={cls=
"edit";left=784;top=140;right=955;bottom=160;dr=1;dt=1;edge=1;z=11};
txt_log={cls=
"richedit";left=705;top=172;right=964;bottom=480;db=1;dr=1;dt=1;edge=1;multiline=1;vscroll=1;wrap=1;z=2};
txt_mama={cls=
"edit";left=784;top=116;right=956;bottom=136;dr=1;dt=1;edge=1;z=10};
txt_project={cls=
"combobox";left=784;top=90;right=901;bottom=110;dr=1;dt=1;edge=1;items={};mode="dropdownlist";z=6};
webarea={cls=
"static";text="static";left=4;top=3;right=698;bottom=481;bgcolor=16777215;db=1;dl=1;dr=1;dt=1;z=1}
)
/*}}*/

web.form.util.emulation(11001); 
//强制开启IE11增强模式
web.form.util.gpuRendering(true); //开启GPU加速



    
var wb = web.form( winform.webarea ,0x40000/*_UIFLAG_THEME*/,0x80/*_DLCTL_NO_SCRIPTS*/); 
        wb.noScriptErr=
true;
        

var http = inet.whttp(); 


   
winform.load=
function()
{
      
        print_text(
"链接数据库成功");
        print_text(timetostr());
        
        winform.btn_tongbu.disabled = 
true;
        
        winform.obj_load(); 

          
}


winform.mao_login.oncommand = 
function(id,event){
    
//winform.msgbox( winform.mao_login.text );
    var wb = web.form( winform.webarea); 
    
     
    wb.go(
"http://www.tmall.com");
    wb.wait();
}


winform.tao_login.oncommand = 
function(id,event){
    
//winform.msgbox( winform.tao_login.text );
    var wb = web.form( winform.webarea); 
     
    
    wb.go(
"http://www.taobao.com");
    wb.wait();
}

winform.btn_tongbu_more.oncommand = 
function(id,event){
    
//winform.msgbox( winform.btn_tongbu_more.text );
    
    winform.btn_tongbu_more.disabled = 
true;
    
        sqlstr =
/* 
                INSERT INTO [infotable](tid,tdate) 
                SELECT tid,tdate FROM [fromtable] WHERE tid NOT IN (SELECT tid FROM [infotable] )
            */
;
            sqlstr = string.replace(sqlstr,
"@[infotable]",this_tables);
            sqlstr = string.replace(sqlstr,
"@[fromtable]",alimama_plus);
            
            
//print_text(sqlstr);
             
            
            link.query(sqlstr);
            print_text(link.getInfo());
            
          
            
            
            win.delay(10);
            
             
if(!lst)
             {
                winform.btn_tongbu_more.text =
"同步完成";
             }
                
              
            
            print_text(
"点开始,开始采集");
         
            
            winform.btn_tongbu_more.disabled = 
false;
    
    
    
    
}

winform.btn_tongbu.oncommand = 
function(id,event){
     
    
        
if(winform.txt_project.selIndex==1)
        {
            print_text(
"请选择项目,或是添加项目");
            
return ; 
        }
        
        
var count  = 0;
    
        
if(winform.btn_tongbu.text !="同步中")
        {
            winform.btn_tongbu.text =
"同步中";
        }
        
else {
            winform.btn_tongbu.text =
"暂停同步";
            
return ; 
        }
        
        
            
        
            
            
            sqlstr =
/* 
                INSERT INTO [infotable](tid,tdate) 
                SELECT tid,tdate FROM [fromtable] WHERE tid NOT IN (SELECT tid FROM [infotable] )
            */
;
            sqlstr = string.replace(sqlstr,
"@[infotable]",this_tables);
            sqlstr = string.replace(sqlstr,
"@[fromtable]",alimama_plus);
            
            
//print_text(sqlstr);
             
            
            
//link.query(sqlstr);
            //print_text(link.getInfo());
            
            ListSQL   = string.format(
"SELECT tid,tdate FROM %s",alimama_plus);
            InsertSQL_RE = 
"INSERT INTO %s(tid,tdate) values(%s,'%s') ";
        
            
            
//print_text(ListSQL);
     
              
var list,err = link.query(ListSQL);
              
               lst = list.fetchObject();
            
            
            
            
while(lst && winform.btn_tongbu.text == "同步中"){
                
                count++;
                 
                CheckSQL = string.format(
"select count(0) as ctid FROM `%s` where tid=%s",this_tables,lst.tid);
                
                
//print_text(CheckSQL);
                
                
var ck,err = link.query(CheckSQL);
              
                     ck_row = ck.fetchObject();
                     
                     
//print_text(ck_row.ctid);
                
                    
if(ck_row.ctid==0)
                    {
                        print_text(lst.tid+
"未同步");  
                        
//写入数据库中 
                        
                        
                        InsertSQL = string.format(InsertSQL_RE,this_tables,lst.tid,lst.tdate);
                        
                        link.query(InsertSQL);
                        
                        print_text(lst.tid+
"已入写数据"); 
                        
                        
                    }
                    
else {
                        print_text(lst.tid+
"已存在");
                    }
                    
                    
                    
                
                
                winform.txt_count.text = count;
                
                 
                lst = list.fetchObject();
            }
            
            
            
            
            
            win.delay(10);
            
             
if(!lst)
             {
                winform.btn_tongbu.text =
"同步完成";
             }
                
             
            
            
        
            
            print_text(
"点开始,开始采集");
         
            
            
//winform.btn_tongbu.disabled = true;
         
}


winform.txt_project.oncommand = 
function(id,event){
    
//winform.msgbox( winform.txt_project.text );
    
    
    
    
    
    
    
if (event == 0x1/*_CBN_SELCHANGE*/)
    {
    
        
        
if(winform.txt_project.selIndex==1)
        {
            print_text(
"请选择项目,或是添加项目");
            
return ; 
        }
        
        
        
            winform.btn_tongbu.disabled = 
false;
        
            txtid = string.match(winform.txt_project.selText,
"\[([0-9]+)\]"); 
        
         
             this_domain = winform.txt_project.selText;
             
             
             
//创建表格
             table_push = string.replace(this_domain,"@.","_");
             
             print_text(table_push);
             
             str_create1 = string.replace(str_create,
"@[domain]","_"+table_push);
             
            
// print_text(str_create);
             
             
var cr,err = link.query(str_create1);
             
              
             
if(err)
             {  
                print_text(
"表alinfo_"+table_push+"已存在");
                
             }
             
else {
                
                print_text(
"表alinfo_"+table_push+"已创建");
                
             }
             
             this_tables = 
"alinfo_"+table_push;
              
        
             print_text( this_domain+
",打开成功!");
             
             alimama_plus = 
"alimama_"+table_push;
             
             print_text(
"复制数字表"+alimama_plus);
             
             
             
             winform.txt_info.text = this_tables;
             winform.txt_mama.text = alimama_plus;
             
             
    }
    
    
    
}


winform.btn_start.oncommand = 
function(id,event){
     
    
         

        
        
        
if(winform.btn_start.text=="开始")
        {
            winform.btn_start.text =
"停止";
        }
        
else {
            winform.btn_start.text =
"开始";
        }
        
        
 
        
        
var count=winform.txt_count.text;
    
        
while(winform.btn_start.text=="停止" )
        
        {   
            
            tm1 = time.now();
            
            
            sqlstr = 
"select  tid from  "+winform.txt_info.text+" where tcontent is null limit 1"
            
            
//print_text(sqlstr);
            
            
var result,err  = link.query(sqlstr); 
            
                row = result.fetchObject(); 
  
            
            
if(row)
            {
                    
                    
//暂停10ms;
                    win.delay(10);
                    
                       
                    ltid = row.tid;
                    
                     
                 
                            
                            print_text(
"当前tid:"+ltid);
                            
//进入淘宝
                            print_text("开始采集,"+timetostr());  
                             
                            
//url  = "http://item.taobao.com/item.htm?id="+ltid;
                            
                            
                            get_html  = http.down(info_url); 
                             
                             
                             print_text(get_html);
                             
                            
//wb.go(url); 
                            
                            wb.wait(,5000);
                            
                           
                        
                        
                        
//得到记录
                        
                        
var obj_text1,obj_text2;
                        
                            obj_text1 = 
"";
                            obj_text2 = 
"";
                            obj_yprice = 
"0";
                            
                        
var info_url = "";  
                            
                            
                        
var Istmall = false;
                        
                        
                        
var obj_tbtm = "淘宝";
                        
                        
var obj_isview = 1;
                            
                            
                        
if(string.find(wb.host,'tmall'))
                        {
                            
//进入的是天猫
                            
                            print_text(
"这个是天猫");
                            
                            obj_tbtm = 
"天猫";
                            
                            Istmall = 
true;
                             
                            obj_yprice   = 
"¥";
                            obj_yprice  += string.match(wb.body.outerHTML,
"defaultItemPrice""\:""(.*?)"",");
                            
                         
                            
                            print_text(
"原价格"+obj_yprice);
                            
                            
                            
                            obj_J_AttrUL = wb.getEle(
"J_AttrUL");
                            
                            
if(obj_J_AttrUL)
                            {
                                obj_text1 = obj_J_AttrUL.outerHTML; 
                            
                            }
                             
                            
                            info_url = string.match(wb.body.outerHTML,
"descUrl""\:""(.*?)"",");
                            
                          
                            
                            
                            
//print_text(wb.body.outerHTML);
                            
                            
//print_text(info_url);
                            
                            print_text(
"下载内容完成");
                            
                            
                            win.delay(10);
                            
                            get_html  = http.down(info_url);
                             
                            
                            
if(get_html)
                            {
                                obj_text2  = string.match(get_html,
"var desc='(.*)'");
                            }
                            
                            
                            
                            
                            
//print_text(obj_text2);
                            
                            
                         
                            
                         
                            
                            
                        
                        }
                        
else {
                            
//进入的是淘宝
                            print_text("这个是淘宝");
                            
                            obj_attributes = wb.getEle(
"attributes");
                            
                            
if(obj_attributes)
                            {
                                obj_text1 = obj_attributes.outerHTML;
                            }
                            
                            
                            
                            
                            
if(wb.getEle("J_StrPrice"))
                            {
                                
                                obj_yp = wb.getEle(
"J_StrPrice");
                            
                                obj_yprice = obj_yp.outerText;
                            
                                print_text(
"原价格"+obj_yprice);
                            }
                            
                            
                            
                              
                            obj_J_DivItemDesc = wb.getEle(
"J_DivItemDesc"); 
                                
                            info_url = string.match(wb.body.outerHTML,
"apiItemDesc""\:""(.*?)"","); 
                            
                            print_text(
"下载内容完成");
                            
                         
                            win.delay(10);
                            
                            
                            get_html  = http.down(info_url); 
                            
                            
if(get_html)
                            {
                                obj_text2  = string.match(get_html,
"var desc='(.*)'");
                            }
                             
                              
                            
                        }
                         
                        
                            
var loadtimes=0;
                            
                            
while(string.find(obj_text2,"描述加载中") && string.len(obj_text2)==5 && winform.btn_start.text=="停止" && loadtimes<10)
                            {
                                
                                print_text(
"内容正在加载,程序等3秒"); 
                                win.delay(1000); 
                                
                                 
                                 
                                 
                                
try{
                                    
                                    win.delay(300);
                                    
                                    
if(Istmall)
                                    {
                                        info_url = string.match(wb.body.outerHTML,
"descUrl""\:""(.*?)"",");
                                    }
                                    
else {
                                        
                                        info_url = string.match(wb.body.outerHTML,
"apiItemDesc""\:""(.*?)"","); 
                                    }
                                    
                                    
                                    get_count = 0;
                                    get_html  = http.down(info_url);
                                    
                                    print_text(
"内容网址"); 
                                    print_text(get_html);
                                    
                                    
                                    
while(get_html=="" || get_count<=3){
                                    
                                        get_html  = http.down(info_url);
                                        get_count++;
                                        
                                    }
                                    
                            
                                    
if(get_html)
                                    {
                                        obj_text2  = string.match(get_html,
"var desc='(.*)'");
                                        
                                        print_text(
"内容重设成功");
                                        
                                        print_text(obj_text2);
                                    }
                                        
                                    
if(loadtimes=5)
                                    {
                                        
//等了五次,还没有列新,就刷新这个;
                                        win.delay(1);
                                        
//wb.refresh();
                                        wb.refresh3();
                                        win.delay(1000);
                                        wb.refresh3();
                                        wb.wait(
"",10000);
                                    }
                                        
                                }
                                
catch(e){ 
                                    print_text(
tostring(e));
                                }
                                  
                                loadtimes++;
                                
                            }
                        
                        
                        
                        
if(loadtimes>=10)
                        {
                            
                            obj_text2 = 
"描述加载中";
                        }
                    
                            
                             
                        print_text(
"采集完成,"+timetostr());  
                        
                        win.delay(1);
                        
                        
if(string.find(wb.body.outerText,"此宝贝已下架"))
                        {
                            print_text(
"=======此宝贝已下架=======");
                            obj_text2 = 
"此宝贝已下架";
                            obj_isview = 0;
                            
                            
                        }
                        
elseif(string.find(wb.location,"noitem"))
                        {
                            print_text(
"=======宝贝不存在=======");
                            obj_text2 = 
"宝贝不存在";
                            obj_isview = -1;
                        }
                        
                        
                        
                        
                        
                        
                        
                         
                
                print_text(
"写入表,"+timetostr());  
                
//执行SQL语句,使用命名参数
                var  ok ,err = link.exec("UPDATE `"+winform.txt_info.text+"` set 
                                text2=@text1,
                                tcontent=@text2 ,
                                tbtm=@tbtm,
                                yprice=@yprice,
                                isview=@isview 
                                WHERE tid=@tid"
,{
                    
                    text1 = obj_text1;
                    text2 = obj_text2;
                    tbtm  = obj_tbtm;
                    yprice= obj_yprice;
                    isview = obj_isview;
                    tid = ltid;
                } ); 
                
                        
if(err)
                        {
                            print_text(err);
                        }
                        
                        win.delay(10);
                        
                        print_text(
"添加完成:"+timetostr());  
                        
                        
                        win.delay(300)
                        
                        
import inet.http;
                        
                        inet.clearSesseion();
                        inet.clearCookie();
                        
                        
                        
                        wb.go(
"http://www.aixq.com/ip.php");
                        wb.wait(
"");
                        win.delay(10);
                        
                        tm3  = time.now();
                        
                        time1 =  
tonumber(tm3) - tonumber(tm1)
                        print_text(
"这次用时:["+time1+"]秒");
                        
                        
                        print_line(); 
                        print_line(); 
                        
                        winform.txt_count.text = count;
                        
                        
                        
collectgarbage("collect");
                        
sleep(10);
                        
                        
                    count++;
            
            }
            
else {
                    
                    winform.btn_start.text = 
"开始"
                    print_line(); 
                    print_text(
"========================="); 
                    print_text(
"表格记录结束!");
                    print_text(timetostr());  
                
break ;
            }
            
            
            
        
        
            
            
        
        }
            
        
    print_line(); 
    print_text(
"===========END==========="); 
    print_text(
"用户停止,或是记录集已结束!"); 
    
    
    
    winform.btn_start.text = 
"开始";  
}







print_text = 
function(logstr)
{
    
    winform.txt_log.appendText(
tostring(logstr)+'\r\n'); 
    win.delay(1);
  
}


winform.obj_load = 
function()
{

     winform.txt_project.clear();
    
     sqlstr = 
"select * from `soft_config`";
    
     
var result =   link.query(sqlstr); 
     
     row = result.fetchObject();
     
     
     winform.txt_project.add(
"选择项目");
     
     winform.txt_project.selIndex = 1;
     
     
     
while(row){
        
            winform.txt_project.add(row.domain);    
             
            row = result.fetchObject();
     }
    

}

print_line = 
function()
{

    winform.txt_log.appendText( 
'\n');  
}
 
 
 
/***
*时间函数 
***/

timetostr = 
function()
{
    tm = time.now();
    tm.format = 
"%Y-%m-%d %H:%M:%S"
    
return tostring(tm); 
}
 

winform.show();
winform.load();
win.loopMessage();



 文章页返回首页易经>>

« 上一篇
« 下一篇

评论列表:

1楼.奇友   2016-10-19 17:00  回复该评论 
要采集阿里数据,懂mysql的,请联系站长!
我顶 (0我踩 (0举报 (0回复
2楼.奇友   2017-11-21 19:51  回复该评论 
神箭手云爬虫平台,操作简单,轻松获取数据,一键发布数据,贼溜~~~
我顶 (0我踩 (0举报 (0回复

发表评论:

 

◎欢迎参与讨论