/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.parser;

import edu.uci.ics.crawler4j.parser.ParseData;
import edu.uci.ics.crawler4j.url.WebURL;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.Set;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class BinaryParseData
implements ParseData {
    private static final Logger logger = LoggerFactory.getLogger(BinaryParseData.class);
    private static final String DEFAULT_ENCODING = "UTF-8";
    private static final String DEFAULT_OUTPUT_FORMAT = "html";
    private static final Parser AUTO_DETECT_PARSER = new AutoDetectParser();
    private static final SAXTransformerFactory SAX_TRANSFORMER_FACTORY = (SAXTransformerFactory)TransformerFactory.newInstance();
    private final ParseContext context = new ParseContext();
    private Set<WebURL> outgoingUrls = new HashSet<WebURL>();
    private String html = null;

    public BinaryParseData() {
        this.context.set(Parser.class, (Object)AUTO_DETECT_PARSER);
    }

    public void setBinaryContent(byte[] data) throws TransformerConfigurationException, TikaException, SAXException, IOException {
        ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        TransformerHandler handler = BinaryParseData.getTransformerHandler(outputStream, DEFAULT_OUTPUT_FORMAT, DEFAULT_ENCODING);
        AUTO_DETECT_PARSER.parse((InputStream)inputStream, (ContentHandler)handler, new Metadata(), this.context);
        this.html = new String(outputStream.toByteArray(), DEFAULT_ENCODING).replace("http://www.w3.org/1999/xhtml", "");
    }

    private static TransformerHandler getTransformerHandler(OutputStream out, String method, String encoding) throws TransformerConfigurationException {
        TransformerHandler transformerHandler = SAX_TRANSFORMER_FACTORY.newTransformerHandler();
        Transformer transformer = transformerHandler.getTransformer();
        transformer.setOutputProperty("method", method);
        transformer.setOutputProperty("indent", "yes");
        if (encoding != null) {
            transformer.setOutputProperty("encoding", encoding);
        }
        transformerHandler.setResult(new StreamResult(new PrintStream(out)));
        return transformerHandler;
    }

    public String getHtml() {
        return this.html;
    }

    public void setHtml(String html) {
        this.html = html;
    }

    @Override
    public Set<WebURL> getOutgoingUrls() {
        return this.outgoingUrls;
    }

    @Override
    public void setOutgoingUrls(Set<WebURL> outgoingUrls) {
        this.outgoingUrls = outgoingUrls;
    }

    @Override
    public String toString() {
        return this.html == null || this.html.isEmpty() ? "No data parsed yet" : this.html;
    }
}

