www.pudn.com > htmlpars.zip > CheckLinks.cpp, change:1998-12-30,size:3153b


#include <windows.h> 
#include <iostream> 
#include "inet.h" 
#include "html.h" 
 
using namespace std; 
 
 
int main( int argc, char *argv[] ) 
{ 
 
    if ( argc < 2 ) 
    { 
        cout << "syntax: CheckLinks <HTML filename or wildcard>" << endl; 
		return 0; 
    } 
 
    HRESULT hRes; 
    hRes = OleInitialize( NULL ); 
    if( !SUCCEEDED( hRes ) )  
        return 1;      
 
 
    WinInet inet; 
    HTMLParser *php = HTMLParser::Create(); 
    WIN32_FIND_DATA wfd; 
 
	// make sure we got an internet connection 
	if ( !inet.IsConnected() ) 
	{ 
		cerr << "Could not connect to the internet..." << endl; 
		php->Release(); 
		OleUninitialize(); 
		return 2; 
	} 
 
	// make sure we connected to IE4 properly 
	if ( !php->IsConnected() ) 
	{ 
		cerr << "Could not connect to Internet Explorer 4..." << endl; 
		php->Release(); 
		OleUninitialize(); 
		return 3; 
	} 
 
	// cache the directory specified on the command line 
	string strDir; 
	char szDrive[_MAX_DRIVE]; 
	char szDir[_MAX_PATH]; 
 
	_splitpath(argv[1], szDrive, szDir, NULL, NULL ); 
	if ( szDrive[0] || szDir[0] ) 
	{ 
		strDir = szDrive; 
		strDir += szDir; 
	} 
 
 
    HANDLE hFind = FindFirstFile(argv[1], &wfd); 
 
    if ( hFind != INVALID_HANDLE_VALUE ) 
    { 
		string strFile; 
 
        do 
        { 
            if ( !(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ) 
            { 
                strFile = strDir + wfd.cFileName; 
 
                if ( php->LoadHTMLFile(strFile.c_str()) ) 
                { 
                    cout << php->GetLinkCount() << " links" << endl; 
                    for ( int i = 0; i < php->GetLinkCount(); i++ ) 
                    { 
                        string strURL; 
                        if (php->GetLinkURL(i, strURL) ) 
                        { 
                            if ( !inet.CheckLink(strURL.c_str()) ) 
                                cout << "DEAD LINK: " << strURL << endl; 
                            else 
                                cout << "OK: " << strURL << endl; 
                        } 
                        else 
                            cout << "ERROR getting link" << endl; 
                    } 
 
                    cout << php->GetImageCount() << " images" << endl; 
                    for ( i = 0; i < php->GetImageCount(); i++ ) 
                    { 
                        string strURL; 
                        if (php->GetImageURL(i, strURL) ) 
                        { 
                            if ( !inet.CheckLink(strURL.c_str()) ) 
                                cout << "DEAD IMG: " << strURL << endl; 
                            else 
                                cout << "OK: " << strURL << endl; 
                        } 
                        else 
                            cout << "ERROR getting image" << endl; 
                    } 
                } 
                else 
                    cout << "Error loading html file " << strFile << endl; 
            } 
 
        } while ( FindNextFile(hFind, &wfd)); 
 
        FindClose(hFind); 
    } 
 
 
	php->Release(); 
 
	OleUninitialize(); 
 
 
    return( 0 );  
 
}