bashgid/android/src/fishrungames/bashgid/core/HtmlDownloadManager.java

367 lines
9.1 KiB
Java

package fishrungames.bashgid.core;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xmlpull.v1.XmlPullParserException;
import android.os.Bundle;
import fishrungames.networkutils.ContextHolder;
import fishrungames.networkutils.DownloadFunctions;
import fishrungames.networkutils.FileFunctions;
import fishrungames.networkutils.ImageManager;
import fishrungames.networkutils.SimplePropertyTree;
import fishrungames.networkutils.UniversalActionWithCallback;
import fishrungames.networkutils.UpdateCallbackHolder;
import fishrungames.networkutils.interfaces.NetworkActionInterface;
import fishrungames.networkutils.interfaces.RemoveCallbackInterface;
import fishrungames.networkutils.interfaces.UpdateAndFinishCallbackInterface;
public class HtmlDownloadManager implements NetworkActionInterface
{
public static class TextFileRecord
{
public String url;
public String baseUrl;
public String localUrl;
public static class DocPart
{
public enum DocPartType
{
DPT_TEXT,
DPT_IMAGE,
DPT_BUTTON
}
public DocPartType docPartType;
public String value;
public DocPart(DocPartType docPartType, String value)
{
this.docPartType = docPartType;
this.value = value;
}
}
public ArrayList<DocPart> contentArr = new ArrayList<DocPart>();
public enum SeparatorType
{
ST_DIV_ID,
ST_DIV_CLASS,
ST_ARTICLE
}
public SeparatorType separator;
public String separatorValue;
public String title;
public String topicImg;
public TextFileRecord(String url, String baseUrl, String localUrl, SeparatorType separator, String separatorValue, String title, String topicImg)
{
this.url = url;
this.baseUrl = baseUrl;
this.localUrl = localUrl;
this.separator = separator;
this.separatorValue = separatorValue;
this.title = title;
this.topicImg = topicImg;
}
}
private static HtmlDownloadManager instance = null;
public static HtmlDownloadManager getInstance()
{
if (instance == null)
{
instance = new HtmlDownloadManager();
}
return instance;
}
public static RemoveCallbackInterface DownloadHtmlFiles(UpdateAndFinishCallbackInterface callback)
{
return UniversalActionWithCallback.PerformActionIfNotPerforming(HtmlDownloadManager.getInstance(), null, callback, "TextFileDownloadManager");
}
private ArrayList<TextFileRecord> GlobelHtmlUrlList = new ArrayList<TextFileRecord>();
@Override
public void InThreadAction(Bundle query, UpdateCallbackHolder callbackHolder)
{
InputStream is = null;
SimplePropertyTree ptree = null;
try
{
is = ContextHolder.getContext().getAssets().open("urllist.xml");
ptree = SimplePropertyTree.parse(is);
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
} catch (XmlPullParserException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
} finally
{
if (is != null)
{
try
{
is.close();
} catch (IOException e)
{
e.printStackTrace();
}
}
}
if (ptree != null)
{
ArrayList<TextFileRecord> htmlUrlList = getListFromPropertyTree(ptree);
if (htmlUrlList.size() > 0)
{
for (int i = 0; i < htmlUrlList.size(); i++)
{
String fileContent = DownloadFunctions.getXmlFromUrl(htmlUrlList.get(i).url);
if (fileContent != null)
{
Document doc = Jsoup.parse(fileContent);
Elements linkElements = doc.getElementsByTag("link");
linkElements.remove();
Elements scriptElements = doc.getElementsByTag("script");
scriptElements.remove();
Elements hiddenElements = doc.getElementsByAttributeValue("style", "display: none;");
hiddenElements.remove();
Elements styleElements = doc.getElementsByTag("style");
styleElements.remove();
Elements aElements = doc.getElementsByTag("a");
for (Element a : aElements)
{
a.removeAttr("href");
a.attributes().put("href", "#");
}
Elements imgElements = doc.getElementsByTag("img");
for (Element img : imgElements)
{
String link = ImageTagToLink(htmlUrlList.get(i).baseUrl, img);
if (!link.equals(""))
{
ImageManager.getInstance().AddImageToLibrary(link);
}
}
Element body = doc.body();
Element mainDiv;
if (htmlUrlList.get(i).separator == TextFileRecord.SeparatorType.ST_DIV_ID)
{
mainDiv = body.getElementById(htmlUrlList.get(i).separatorValue).clone();
} else if (htmlUrlList.get(i).separator == TextFileRecord.SeparatorType.ST_DIV_CLASS)
{
mainDiv = body.getElementsByClass(htmlUrlList.get(i).separatorValue).first().clone();
} else
{
mainDiv = body.getElementsByTag(htmlUrlList.get(i).separatorValue).first().clone();
}
body.children().remove();
body.appendChild(mainDiv);
String newHtmlCode = doc.html();
Elements mainDivElements = mainDiv.children();
htmlUrlList.get(i).contentArr.add(new TextFileRecord.DocPart(TextFileRecord.DocPart.DocPartType.DPT_TEXT, htmlUrlList.get(i).title));
for (Element subDiv : mainDivElements)
{
String text = subDiv.text();
Elements subDivImgArr = subDiv.getElementsByTag("img");
for (Element subDivImg : subDivImgArr)
{
String imageLink = ImageTagToLink(htmlUrlList.get(i).baseUrl, subDivImg);
if (!imageLink.equals(""))
{
//Xperimental
//AlbumManager.getInstance().AddPhotoToAlbum(htmlUrlList.get(i).title, "", imageLink);
htmlUrlList.get(i).contentArr.add(new TextFileRecord.DocPart(TextFileRecord.DocPart.DocPartType.DPT_IMAGE, imageLink));
}
}
if (!text.equals(""))
{
htmlUrlList.get(i).contentArr.add(new TextFileRecord.DocPart(TextFileRecord.DocPart.DocPartType.DPT_TEXT, text));
}
}
htmlUrlList.get(i).contentArr.add(new TextFileRecord.DocPart(TextFileRecord.DocPart.DocPartType.DPT_BUTTON, htmlUrlList.get(i).localUrl));
FileFunctions.WriteTextToFile(htmlUrlList.get(i).localUrl, newHtmlCode);
}
}
}
synchronized(GlobelHtmlUrlList)
{
GlobelHtmlUrlList.clear();
GlobelHtmlUrlList.addAll(htmlUrlList);
}
}
callbackHolder.OnUpdated(null);
}
private static String ImageTagToLink(String baseUrl, Element imageTag)
{
String link = imageTag.attributes().get("src");
if (link.endsWith(".png") || link.endsWith(".jpg") || link.endsWith(".jpeg") || link.endsWith(".bmp")
|| link.endsWith(".PNG") || link.endsWith(".JPG") || link.endsWith(".JPEG")
|| link.endsWith(".BMP"))
{
if (link.startsWith("//"))
{
link = "http:" + link;
}
if (!link.startsWith("http://"))
{
link = baseUrl + link;
}
return link;
} else
{
return "";
}
}
public static void RecursiveRemoveScript(Element e)
{
for (Iterator<Element> iterator = e.children().iterator(); iterator.hasNext();)
{
Element c = iterator.next();
RecursiveRemoveScript(c);
if ("script".equals(c.tagName()))
{
iterator.remove();
}
}
}
public ArrayList<TextFileRecord> getGlobalHtmlList()
{
ArrayList<TextFileRecord> result = new ArrayList<TextFileRecord>();
synchronized(GlobelHtmlUrlList)
{
result.addAll(GlobelHtmlUrlList);
}
return result;
}
public static ArrayList<TextFileRecord> getListFromPropertyTree(SimplePropertyTree ptree)
{
ArrayList<TextFileRecord> htmlUrlList = new ArrayList<TextFileRecord>();
SimplePropertyTree listNode = ptree.GetNode("list");
if (listNode != null)
{
for (int i = 0; i < listNode.SubNodes.size(); i++)
{
String url = listNode.SubNodes.get(i).Get("url");
String baseUrl = listNode.SubNodes.get(i).Get("baseUrl");
String localUrl = listNode.SubNodes.get(i).Get("localUrl");
TextFileRecord.SeparatorType separator;
if (listNode.SubNodes.get(i).Get("separator").equals("ST_DIV_ID"))
{
separator = TextFileRecord.SeparatorType.ST_DIV_ID;
} else if (listNode.SubNodes.get(i).Get("separator").equals("ST_DIV_CLASS"))
{
separator = TextFileRecord.SeparatorType.ST_DIV_CLASS;
} else
{
separator = TextFileRecord.SeparatorType.ST_ARTICLE;
}
String separatorValue = listNode.SubNodes.get(i).Get("separatorValue");
String title = listNode.SubNodes.get(i).Get("title");
String topicImg = listNode.SubNodes.get(i).Get("topicImg");
htmlUrlList.add(new TextFileRecord(url, baseUrl, localUrl, separator, separatorValue, title, topicImg));
ImageManager.getInstance().AddImageToLibrary(topicImg);
}
}
return htmlUrlList;
}
}