www.pudn.com > openwebspiderv0.1a.zip > openwebspider-0.1.c
/* OpenWebSpiderV0.1a (Work in progress...) * * Coded by Shen139 * shen139 [at] eviltime (dot) com * * Compile with * - Linux: gcc openwebspider-0.1.c -o openwebspider -g -L /usr/local/mysql/lib/ -lmysqlclient -lnsl -lm * - libmysqlclient10-dev (or mysql-dev) needed * - Windows: Microsoft Visual C++ 6.0 * * * FAQ about Robots and Search engine here: http://www.robotstxt.org/wc/faq.html * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include#include #include "options.h" #include "getopt.h" #include "socket.h" #include "misc.h" #include "htmlfnct.h" #include "hstlist.h" #include "ndzlist.h" #include "sqlfnct.h" #define AUTHOR Shen139 #define VERSION "0.1a" int usage(char* Error) { printf("Error:\n %s\n",Error); printf("Usage:\nOpenWebSpider Mode Scan Mode Parameters\n"); printf(" OpenWebSpider Mode:\n -i\t[Indicize] Hostname User Password Database start_url\n -I\t[Indicized search] Hostname User Password Database Query\n -r\t[Real Time search] [Key] start_url\n"); printf(" Scan Mode:\n -s\t[Single Host scan mode]\n -R\t[Recursive Host scan mode]\n"); printf(" -m\t[Show mails too]\n -c\t[Create DataBase] hostname username password name_of_database\n"); exit(0); } int main(int argc, char*argv[]) { #ifdef linux int sock; #else SOCKET sock; #endif char packet[MAXPACKETSIZE]; char html[MAXPACKETSIZE]; char starturl[MAXURLSIZE], *starturlTmp; char LastHostOK[MAXHOSTSIZE]; char Key[MAXKEYSIZE]; char Hostname[MAXHOSTSIZE]; char username[20]; char password[30]; char dbname[30]; char Query[MAXARGQUERYSIZE]; int c, condition = 1, recvdbytes; struct sHost currentHst; extern int optind; printf("OpenWebSpider(v%s)\n Coded by Shen139\n shen139(at)eviltime(dot)com\n\n",VERSION); if(argc<3) usage("Too few arguments"); memset(starturl,0,MAXURLSIZE); memset(Key,0,MAXKEYSIZE); while ((c = getopt(argc, argv, "IirsRmc")) != -1) switch (c) { case 'I': if(scan_mode!=0xFF) usage("Scan Mode redefinition"); scan_mode=2; if(optind+4>=argc) usage("No enough arguments in real time search"); if(strlen(argv[optind])>MAXHOSTSIZE) usage("Hostname too long"); else strncpy(Hostname,argv[optind],MAXHOSTSIZE); if(strlen(argv[optind+1])>20) usage("Username too long"); else strncpy(username,argv[optind+1],20); if(strlen(argv[optind+2])>30) usage("Password too long"); else strncpy(password,argv[optind+2],30); if(strlen(argv[optind+3])>30) usage("DataBase name too long"); else strncpy(dbname,argv[optind+3],30); if(strlen(argv[optind+4])>MAXARGQUERYSIZE) usage("Query too long"); else strncpy(Query,argv[optind+4],MAXARGQUERYSIZE); break; case 'i': if(scan_mode!=0xFF) //At startup scan_mode==0xFF => uninitialized usage("Scan Mode redefinition"); scan_mode=1; if(optind+4>=argc) usage("No enough arguments in real time search"); if(strlen(argv[optind])>MAXHOSTSIZE) usage("Hostname too long"); else strncpy(Hostname,argv[optind],MAXHOSTSIZE); if(strlen(argv[optind+1])>20) usage("Username too long"); else strncpy(username,argv[optind+1],20); if(strlen(argv[optind+2])>30) usage("Password too long"); else strncpy(password,argv[optind+2],30); if(strlen(argv[optind+3])>30) usage("DataBase name too long"); else strncpy(dbname,argv[optind+3],30); if(strlen(argv[optind+4])>MAXHOSTSIZE) usage("Url too long"); else strncpy(starturl,argv[optind+4],MAXHOSTSIZE); break; case 'r': if(scan_mode!=0xFF) usage("Scan Mode redefinition"); if(optind+1>=argc) usage("No enough arguments in real time search"); scan_mode=0; if(strlen(argv[optind])>MAXKEYSIZE) usage("Key too long"); else strncpy(Key,argv[optind],MAXKEYSIZE); if(strlen(argv[optind+1])>MAXHOSTSIZE) usage("Url too long"); else strncpy(starturl,argv[optind+1],MAXHOSTSIZE); break; case 's': starthostonly=1; break; case 'R': starthostonly=0; break; case 'm': listmails=1; break; case 'c': //Create database if(argc>6) usage("Too many arguments to create a database"); else if(argc<6) usage("Too few arguments to create a database"); else { if(sqlCreateDB(argv[optind],argv[optind+1],argv[optind+2],argv[optind+3])==0) fprintf(stderr,"Error creating tables\n"); else printf("Database created\n"); return 1; } break; } if(scan_mode==0xFF) //at this point scan_mode bust be 0 OR 1 usage("Scan mode undefined"); if(scan_mode==2) { printf("Scan Mode: \tIndicized\n"); printf("Key: \t%s\n",Query); printf("Surfing the DB...\n"); return(IndicizedSearch(Hostname,username,password,dbname,Query,0xFF)); } if(strncmp(starturl,"http://",7)!=0) { starturlTmp=malloc(MAXURLSIZE); strncpy(starturlTmp,starturl,strlen(starturl)+1); snprintf(starturl, MAXURLSIZE, "http://%s",starturlTmp); free(starturlTmp); } if(ParseUrl(starturl,¤tHst,NULL)==-1) usage("Wrong start URL"); currentHst.viewed = 1; strncpy(currentHst.Description,starturl,MIN(strlen(starturl),MAXDESCRIPTIONSIZE)); first = lstInit(currentHst); if(starthostonly==1) { strtHst=malloc(sizeof(struct sHost)); memcpy(strtHst,¤tHst,sizeof(struct sHost)); } printf("\nStart Host: \t%s\nStart Page: \t%s\n",currentHst.Host, currentHst.Page); if(scan_mode==0) printf("Key:\t\t\t%s\n",Key); printf("Scan Mode: \t%s\n",(scan_mode==0)?"Real Time Search":"Indicize"); printf("OpenWebSpider Mode:\t%s\n\n",(starthostonly==1)?"Single Host":"Recursive"); printf("Surfing the net...\n"); #ifdef WIN32 if(!StartUpWinsock()) { fprintf(stderr,"WSAStartup() error\n"); return -1; } #endif while(condition) { if(currentHst.type == 4 && listmails==1) { currentHst.viewed = 1; printf("Mail Found: %s\n",currentHst.Page); lstFreeAll(first); goto endofwhile; } printf("Current -> http://%s%s",currentHst.Host,currentHst.Page); if(!LoadSocket(&sock,¤tHst,LastHostOK)) { #ifdef linux close(sock); #else closesocket(sock); #endif fprintf(stderr,"\nSocket() error\n"); goto endofwhile; } #ifdef linux if (connect(sock, (struct sockaddr*) &saddr, sizeof(saddr)) == -1) #else if (connect(sock, (LPSOCKADDR) &saddr, sizeof(saddr)) == SOCKET_ERROR) #endif { #ifdef linux close(sock); #else closesocket(sock); #endif goto endofwhile; } strncpy(LastHostOK,currentHst.Host,MAXHOSTSIZE); ForgeHTTPPacket(currentHst,packet); send(sock,packet,strlen(packet),0); recvdbytes=RecvPackets(sock,packet,sizeof(packet)); if(recvdbytes<=1) { printf("\t\t[ERROR]]\n"); goto endofwhile; } if(currentHst.type !=4 && recvdbytes > 1) { if(ParseHTTPRequest(packet,html,MAXPACKETSIZE)==1) printf("\t\t[%i bytes (%i KB) OK]\n", recvdbytes,recvdbytes/1024); else { printf("\t\t[ERROR]\n"); goto endofwhile; } if(currentHst.type == 1) //Looks for urls only in html page LookForUrls(html,currentHst); if(currentHst.type <= 2) //Looks for the key only in plain text files { if(scan_mode==0) { if(LooksForKey(html,Key)==1) printf("Key found\n\n"); } else if(scan_mode==1) Indicize(Hostname,username,password,dbname,html,currentHst); } } else printf("\n"); #ifdef linux close(sock); #else closesocket(sock); #endif endofwhile: if((ReturnFirstUrl(¤tHst))==-1) { fprintf(stderr,"\nBuffer empty\n"); lstFreeAll(first); return 0; } } lstFreeAll(first); return 1; } /*EOF*/