/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.robotstxt;

import edu.uci.ics.crawler4j.robotstxt.HostDirectives;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.UserAgentDirectives;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RobotstxtParser {
    private static final Logger logger = LoggerFactory.getLogger(RobotstxtParser.class);
    private static final Pattern RULE_PATTERN = Pattern.compile("(?i)^([A-Za-z\\-]+):(.*)");
    private static final Set<String> VALID_RULES = new HashSet<String>(Arrays.asList("allow", "disallow", "user-agent", "crawl-delay", "host", "sitemap"));

    public static HostDirectives parse(String content, RobotstxtConfig config) {
        HostDirectives directives = new HostDirectives(config);
        StringTokenizer st = new StringTokenizer(content, "\n\r");
        HashSet<String> userAgents = new HashSet<String>();
        UserAgentDirectives uaDirectives = null;
        while (st.hasMoreTokens()) {
            String line = st.nextToken();
            int commentIndex = line.indexOf(35);
            if (commentIndex > -1) {
                line = line.substring(0, commentIndex);
            }
            if ((line = line.replaceAll("<[^>]+>", "").trim()).isEmpty()) continue;
            Matcher m = RULE_PATTERN.matcher(line);
            if (m.matches()) {
                String rule = m.group(1).toLowerCase();
                String value = m.group(2).trim();
                if (VALID_RULES.contains(rule)) {
                    if (rule.equals("user-agent")) {
                        String currentUserAgent = value.toLowerCase();
                        if (uaDirectives != null) {
                            userAgents = new HashSet();
                            directives.addDirectives(uaDirectives);
                            uaDirectives = null;
                        }
                        userAgents.add(currentUserAgent);
                        continue;
                    }
                    if (uaDirectives == null) {
                        if (userAgents.isEmpty()) {
                            userAgents.add("*");
                        }
                        uaDirectives = new UserAgentDirectives(userAgents);
                    }
                    uaDirectives.add(rule, value);
                    continue;
                }
                logger.info("Unrecognized rule in robots.txt: {}", (Object)rule);
                continue;
            }
            logger.debug("Unrecognized line in robots.txt: {}", (Object)line);
        }
        directives.addDirectives(uaDirectives);
        return directives;
    }
}

