www.pudn.com > openwebspiderv0.1a.zip > misc.h
/* OpenWebSpider
*
* Coded by Shen139
* shen139 [at] eviltime (dot) com
*
*
* This file is part of OpenWebSpider
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#ifndef __MISC
#define __MISC
#include "strfnct.h"
#include "htmlfnct.h"
#include "ndzlist.h"
#include "options.h"
#include "sqlfnct.h"
#include "snprintf.c"
/* ForgePacket
* hst -> packet <-
* hst.Page = "/prova.htm" ==> packet = "GET /prova.htm HTTP/1.1"
*/
int ForgeHTTPPacket(struct sHost hst,char * packet)
{
snprintf(packet,MAXPACKETSIZE,"GET %s HTTP/1.1\r\nHost: %s\r\n\r\n",hst.Page,hst.Host);
return 1;
}
/* ParseHTTPRequest
* recvdpkt -> htmlOut <- maxout ->
* Return the packet without the HTTP header
*/
int ParseHTTPRequest(char* recvdpkt,char* htmlOut,int maxout)
{
int c;
#ifdef WIN32
if(strnicmp(recvdpkt,"HTTP/1.1 200 OK",15)!=0 && strnicmp(recvdpkt,"HTTP/1.1 302 Found",15)!=0)
#else
if(strncasecmp(recvdpkt,"HTTP/1.1 200 OK",15)!=0 && strncasecmp(recvdpkt,"HTTP/1.1 302 Found",15)!=0)
#endif
return 0;
for(c=0;c<(signed)strlen(recvdpkt);c++)
if(strncmp(recvdpkt+c,"\r\n\r\n",4)==0)
{
strncpy(htmlOut,recvdpkt+c+4,MIN(maxout,MAXPACKETSIZE));
return 1;
}
return 0;
}
/* UnHtml
* html -> text <-
* html = "TesT123
"
* text => TesT123
*/
int UnHtml(char* html, char* text,int maxout)
{
int i,m,x=0,pOpen=0;
memset(text,0,maxout);
m=MIN((signed)strlen(html),maxout);
for(i=0;i')
pOpen=0;
if(pOpen==0 && html[i]!='>')
text[x++]=html[i];
}
return x;
}
int Split(char* text,char* Tokens, NODE* first,int MaxWordSize)
{
int i;
char tmp[2];
int pWord=0;
char* word=malloc(MaxWordSize);
char* UpWord=malloc(MaxWordSize);
tmp[1]=0;
for(i=0;i<(signed)strlen(text);i++)
{
tmp[0]=text[i];
if(bTokenIn(tmp,Tokens,1)==1)
{
if(pWord1)
{
atoupper(word,UpWord,strlen(word));
if(ndzLookForWord(first,UpWord)==NULL) //Add unique word
lstAddWord(first,UpWord);
}
}
pWord=0;
}
else
if(pWord0)
{
snprintf(title,MAXDESCRIPTIONSIZE,"%s - %s",tmpTitle,host.Description);
usetitle=1;
}
UnHtml(html,cTmp,MAXPACKETSIZE);
OnlyOneSpace(cTmp,pureText,MAXPACKETSIZE);
free(cTmp);
ndzFirst=ndzInit();
Split(pureText," !?'^\"()[]{}+-=,;.:_<>\n\r\t",ndzFirst,MAXWORDSIZE);
lstGetLastNode(ndzFirst,&numofword);
if(sqlConnect(hostname, username ,password , dbname , &mysql)==0)
{
fprintf(stderr, "Failed to connect to database: Error: %s\n",mysql_error(&mysql));
lstFreeAll(ndzFirst);
return 0;
}
if(GetUrlIDfromHostPage(&mysql, host.Host, host.Page, urlID)==0)
{
//Host is not in db! adding it
snprintf(sqlQuery,MAXQUERYSIZE,"INSERT INTO urllist (hostname,page,description) VALUES(\"%s\",\"%s\",\"%s\");",host.Host,host.Page,(usetitle==1) ? title: host.Description);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
if(mysql_affected_rows(&mysql)==1)
GetUrlIDfromHostPage(&mysql, host.Host, host.Page, urlID);
else
return 0;
}
else
{
//delete all entries and reinsert new words
snprintf(sqlQuery,MAXQUERYSIZE,"DELETE FROM urllist WHERE urlID = \"%s\" ;",urlID);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
snprintf(sqlQuery,MAXQUERYSIZE,"DELETE FROM mid WHERE urlID = \"%s\" ;",urlID);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
snprintf(sqlQuery,MAXQUERYSIZE,"INSERT INTO urllist (hostname,page,description) VALUES(\"%s\",\"%s\",\"%s\");",host.Host,host.Page,(usetitle==1) ? title: host.Description);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
if(mysql_affected_rows(&mysql)==1)
GetUrlIDfromHostPage(&mysql, host.Host, host.Page, urlID);
else
return 0;
}
for(i=0;ifield != NULL)
{
//is the word var in db?
snprintf(sqlQuery,MAXQUERYSIZE,"SELECT * FROM wordlist where word=\"%s\";",(char*)tmpNode->field);
if(sqlSendSqlQuery(&mysql, sqlQuery, &result)==1);
{
if((row=mysql_fetch_row(&result))==NULL)
{
//New word! Add it
snprintf(sqlQuery,MAXQUERYSIZE,"INSERT INTO wordlist (word) VALUES(\"%s\");",(char*)tmpNode->field);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
if(mysql_affected_rows(&mysql)!=1)
break;
}
//Links wordID with urlID
//Get wordID
snprintf(sqlQuery,MAXQUERYSIZE,"SELECT wordID FROM wordlist WHERE word=\"%s\";",(char*)tmpNode->field);
sqlSendSqlQuery(&mysql, sqlQuery, &result);
if((row=mysql_fetch_row(&result))==NULL)
break;
else
{
//wordID OK urlID OK! adding them in mid
snprintf(sqlQuery,MAXQUERYSIZE,"INSERT INTO mid (wordID, urlID) VALUES (\"%s\",\"%i\");",row[0],atoi(urlID));
sqlSendSqlQuery(&mysql, sqlQuery, &result);
if(mysql_affected_rows(&mysql)!=1)
break;
}
}
}
}
free(pureText);
lstFreeAll(ndzFirst);
return 1;
}
int IndicizedSearch(char* hostname,char* username,char* password,char* dbname,char* Query, int havingcount)
{
char sqlQuery[MAXQUERYSIZE];
char sngKey[MAXKEYSIZE];
int iNumofkeys=0;
char sNumofkeys[3];
int i,c;
int NumOfResults=0;
MYSQL mysql;
MYSQL_RES result,result2;
MYSQL_ROW row,row2;
if(sqlConnect(hostname, username, password, dbname , &mysql)==0)
{
#ifdef CGI
printf("Failed to connect to database: %s