package ir.webutils;

import ir.utilities.MoreMath;
import ir.utilities.MoreString;
import java.io.File;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:ir/webutils/Spider.class */
public class Spider {
    protected File saveDir;
    protected HashSet visited;
    protected List linksToVisit = new LinkedList();
    protected boolean slow = false;
    protected HTMLPageRetriever webpr = new HTMLPageRetriever();
    protected int count = 0;
    protected int maxCount = 10000;

    public void go(String[] strArr) {
        processArgs(strArr);
        doCrawl();
    }

    public void processArgs(String[] strArr) {
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].charAt(0) == '-') {
                if (strArr[i].equals("-safe")) {
                    handleSafeCommandLineOption();
                } else if (strArr[i].equals("-d")) {
                    i++;
                    handleDCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-c")) {
                    i++;
                    handleCCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-u")) {
                    i++;
                    handleUCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-slow")) {
                    handleSlowCommandLineOption();
                }
            }
            i++;
        }
    }

    protected void handleSafeCommandLineOption() {
        this.webpr = new SafeHTMLPageRetriever();
    }

    protected void handleDCommandLineOption(String str) {
        this.saveDir = new File(str);
    }

    protected void handleCCommandLineOption(String str) {
        this.maxCount = Integer.parseInt(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleUCommandLineOption(String str) {
        this.linksToVisit.add(new Link(str));
    }

    protected void handleSlowCommandLineOption() {
        this.slow = true;
    }

    public void doCrawl() {
        this.visited = new HashSet();
        while (this.linksToVisit.size() > 0 && this.count < this.maxCount) {
            if (this.slow) {
                synchronized (this) {
                    try {
                        wait(1000L);
                    } catch (InterruptedException e) {
                    }
                }
            }
            Link link = (Link) this.linksToVisit.remove(0);
            System.out.println(new StringBuffer().append("Trying: ").append(link).toString());
            if (!this.visited.add(link)) {
                System.out.println("Already visited");
            } else if (linkToHTMLPage(link)) {
                try {
                    HTMLPage hTMLPage = this.webpr.getHTMLPage(link);
                    if (hTMLPage.empty()) {
                        System.out.println("No Page Found");
                    } else {
                        if (hTMLPage.indexAllowed()) {
                            this.count++;
                            System.out.println(new StringBuffer().append("Indexing(").append(this.count).append("): ").append(link).toString());
                            processPage(hTMLPage);
                        }
                        if (this.count < this.maxCount) {
                            this.linksToVisit.addAll(getNewLinks(hTMLPage));
                        }
                    }
                } catch (PathDisallowedException e2) {
                    System.out.println(new StringBuffer().append("Robots excluded from access to").append(link).append(": ").append(e2).toString());
                }
            } else {
                System.out.println("Not HTML Page");
            }
        }
    }

    protected boolean linkToHTMLPage(Link link) {
        String fileExtension = MoreString.fileExtension(link.getURL().getPath());
        return fileExtension.equals("") || fileExtension.equalsIgnoreCase("html") || fileExtension.equalsIgnoreCase("htm") || fileExtension.equalsIgnoreCase("shtml");
    }

    protected List getNewLinks(HTMLPage hTMLPage) {
        return hTMLPage.getOutLinks();
    }

    protected void processPage(HTMLPage hTMLPage) {
        new LinkExtractor(hTMLPage).extractLinks();
        hTMLPage.writeAbsolute(this.saveDir, new StringBuffer().append("P").append(MoreString.padWithZeros(this.count, ((int) Math.floor(MoreMath.log(this.maxCount, 10))) + 1)).toString());
    }

    public static void main(String[] strArr) {
        new Spider().go(strArr);
    }
}
