package ir.webutils;

import java.net.URL;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:ir/webutils/SafeHTMLPageRetriever.class */
public final class SafeHTMLPageRetriever extends HTMLPageRetriever {
    private Set disallowed = new RobotExclusionSet();
    private String currentSite = "";

    @Override // ir.webutils.HTMLPageRetriever
    public HTMLPage getHTMLPage(Link link) throws PathDisallowedException {
        if (this.disallowed.contains(link.getURL())) {
            throw new PathDisallowedException(new StringBuffer().append("Access disallowed :").append(link).toString());
        }
        if (!this.currentSite.equals(getSite(link.getURL()))) {
            this.currentSite = getSite(link.getURL());
            this.disallowed = new RobotExclusionSet(this.currentSite);
        }
        if (this.disallowed.contains(link.getURL().getPath())) {
            throw new PathDisallowedException(new StringBuffer().append("Access disallowed: ").append(link).toString());
        }
        String webPage = WebPage.getWebPage(link.getURL());
        RobotsMetaTagParser robotsMetaTagParser = new RobotsMetaTagParser(link.getURL(), webPage);
        this.disallowed.addAll(getPaths(robotsMetaTagParser.parseMetaTags()));
        return new SafeHTMLPage(link, webPage, robotsMetaTagParser.index());
    }

    private String getSite(URL url) {
        String authority = url.getAuthority();
        return authority.indexOf("@") != -1 ? authority.substring(authority.indexOf("@") + 1) : authority;
    }

    private List getPaths(List list) {
        LinkedList linkedList = new LinkedList();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            linkedList.add(((Link) it.next()).getURL().getPath());
        }
        return linkedList;
    }
}
