// HtmlAgilityPack V1.0 - Simon Mourier #region using System; using System.Collections.Generic; using System.IO; using System.Net; using System.Security; using System.Security.Permissions; using System.Text; using System.Xml; using System.Xml.Serialization; using System.Xml.Xsl; using Microsoft.Win32; using System.IO.Compression; #endregion namespace HtmlAgilityPack { /// /// A utility class to get HTML document from HTTP. /// public class HtmlWeb { #region Delegates /// /// Represents the method that will handle the PostResponse event. /// public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response); /// /// Represents the method that will handle the PreHandleDocument event. /// public delegate void PreHandleDocumentHandler(HtmlDocument document); /// /// Represents the method that will handle the PreRequest event. /// public delegate bool PreRequestHandler(HttpWebRequest request); #endregion #region Fields private bool _autoDetectEncoding = true; private bool _cacheOnly; private string _cachePath; private bool _fromCache; private int _requestDuration; private Uri _responseUri; private HttpStatusCode _statusCode = HttpStatusCode.OK; private int _streamBufferSize = 1024; private bool _useCookies; private bool _usingCache; private string _userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x"; /// /// Occurs after an HTTP request has been executed. /// public PostResponseHandler PostResponse; /// /// Occurs before an HTML document is handled. /// public PreHandleDocumentHandler PreHandleDocument; /// /// Occurs before an HTTP request is executed. /// public PreRequestHandler PreRequest; #endregion #region Static Members private static Dictionary _mimeTypes; internal static Dictionary MimeTypes { get { if (_mimeTypes != null) return _mimeTypes; //agentsmith spellcheck disable _mimeTypes = new Dictionary(); _mimeTypes.Add(".3dm", "x-world/x-3dmf"); _mimeTypes.Add(".3dmf", "x-world/x-3dmf"); _mimeTypes.Add(".a", "application/octet-stream"); _mimeTypes.Add(".aab", "application/x-authorware-bin"); _mimeTypes.Add(".aam", "application/x-authorware-map"); _mimeTypes.Add(".aas", "application/x-authorware-seg"); _mimeTypes.Add(".abc", "text/vnd.abc"); _mimeTypes.Add(".acgi", "text/html"); _mimeTypes.Add(".afl", "video/animaflex"); _mimeTypes.Add(".ai", "application/postscript"); _mimeTypes.Add(".aif", "audio/aiff"); _mimeTypes.Add(".aif", "audio/x-aiff"); _mimeTypes.Add(".aifc", "audio/aiff"); _mimeTypes.Add(".aifc", "audio/x-aiff"); _mimeTypes.Add(".aiff", "audio/aiff"); _mimeTypes.Add(".aiff", "audio/x-aiff"); _mimeTypes.Add(".aim", "application/x-aim"); _mimeTypes.Add(".aip", "text/x-audiosoft-intra"); _mimeTypes.Add(".ani", "application/x-navi-animation"); _mimeTypes.Add(".aos", "application/x-nokia-9000-communicator-add-on-software"); _mimeTypes.Add(".aps", "application/mime"); _mimeTypes.Add(".arc", "application/octet-stream"); _mimeTypes.Add(".arj", "application/arj"); _mimeTypes.Add(".arj", "application/octet-stream"); _mimeTypes.Add(".art", "image/x-jg"); _mimeTypes.Add(".asf", "video/x-ms-asf"); _mimeTypes.Add(".asm", "text/x-asm"); _mimeTypes.Add(".asp", "text/asp"); _mimeTypes.Add(".asx", "application/x-mplayer2"); _mimeTypes.Add(".asx", "video/x-ms-asf"); _mimeTypes.Add(".asx", "video/x-ms-asf-plugin"); _mimeTypes.Add(".au", "audio/basic"); _mimeTypes.Add(".au", "audio/x-au"); _mimeTypes.Add(".avi", "application/x-troff-msvideo"); _mimeTypes.Add(".avi", "video/avi"); _mimeTypes.Add(".avi", "video/msvideo"); _mimeTypes.Add(".avi", "video/x-msvideo"); _mimeTypes.Add(".avs", "video/avs-video"); _mimeTypes.Add(".bcpio", "application/x-bcpio"); _mimeTypes.Add(".bin", "application/mac-binary"); _mimeTypes.Add(".bin", "application/macbinary"); _mimeTypes.Add(".bin", "application/octet-stream"); _mimeTypes.Add(".bin", "application/x-binary"); _mimeTypes.Add(".bin", "application/x-macbinary"); _mimeTypes.Add(".bm", "image/bmp"); _mimeTypes.Add(".bmp", "image/bmp"); _mimeTypes.Add(".bmp", "image/x-windows-bmp"); _mimeTypes.Add(".boo", "application/book"); _mimeTypes.Add(".book", "application/book"); _mimeTypes.Add(".boz", "application/x-bzip2"); _mimeTypes.Add(".bsh", "application/x-bsh"); _mimeTypes.Add(".bz", "application/x-bzip"); _mimeTypes.Add(".bz2", "application/x-bzip2"); _mimeTypes.Add(".c", "text/plain"); _mimeTypes.Add(".c", "text/x-c"); _mimeTypes.Add(".c++", "text/plain"); _mimeTypes.Add(".cat", "application/vnd.ms-pki.seccat"); _mimeTypes.Add(".cc", "text/plain"); _mimeTypes.Add(".cc", "text/x-c"); _mimeTypes.Add(".ccad", "application/clariscad"); _mimeTypes.Add(".cco", "application/x-cocoa"); _mimeTypes.Add(".cdf", "application/cdf"); _mimeTypes.Add(".cdf", "application/x-cdf"); _mimeTypes.Add(".cdf", "application/x-netcdf"); _mimeTypes.Add(".cer", "application/pkix-cert"); _mimeTypes.Add(".cer", "application/x-x509-ca-cert"); _mimeTypes.Add(".cha", "application/x-chat"); _mimeTypes.Add(".chat", "application/x-chat"); _mimeTypes.Add(".class", "application/java"); _mimeTypes.Add(".class", "application/java-byte-code"); _mimeTypes.Add(".class", "application/x-java-class"); _mimeTypes.Add(".com", "application/octet-stream"); _mimeTypes.Add(".com", "text/plain"); _mimeTypes.Add(".conf", "text/plain"); _mimeTypes.Add(".cpio", "application/x-cpio"); _mimeTypes.Add(".cpp", "text/x-c"); _mimeTypes.Add(".cpt", "application/mac-compactpro"); _mimeTypes.Add(".cpt", "application/x-compactpro"); _mimeTypes.Add(".cpt", "application/x-cpt"); _mimeTypes.Add(".crl", "application/pkcs-crl"); _mimeTypes.Add(".crl", "application/pkix-crl"); _mimeTypes.Add(".crt", "application/pkix-cert"); _mimeTypes.Add(".crt", "application/x-x509-ca-cert"); _mimeTypes.Add(".crt", "application/x-x509-user-cert"); _mimeTypes.Add(".csh", "application/x-csh"); _mimeTypes.Add(".csh", "text/x-script.csh"); _mimeTypes.Add(".css", "application/x-pointplus"); _mimeTypes.Add(".css", "text/css"); _mimeTypes.Add(".cxx", "text/plain"); _mimeTypes.Add(".dcr", "application/x-director"); _mimeTypes.Add(".deepv", "application/x-deepv"); _mimeTypes.Add(".def", "text/plain"); _mimeTypes.Add(".der", "application/x-x509-ca-cert"); _mimeTypes.Add(".dif", "video/x-dv"); _mimeTypes.Add(".dir", "application/x-director"); _mimeTypes.Add(".dl", "video/dl"); _mimeTypes.Add(".dl", "video/x-dl"); _mimeTypes.Add(".doc", "application/msword"); _mimeTypes.Add(".dot", "application/msword"); _mimeTypes.Add(".dp", "application/commonground"); _mimeTypes.Add(".drw", "application/drafting"); _mimeTypes.Add(".dump", "application/octet-stream"); _mimeTypes.Add(".dv", "video/x-dv"); _mimeTypes.Add(".dvi", "application/x-dvi"); _mimeTypes.Add(".dwf", "model/vnd.dwf"); _mimeTypes.Add(".dwg", "application/acad"); _mimeTypes.Add(".dwg", "image/vnd.dwg"); _mimeTypes.Add(".dwg", "image/x-dwg"); _mimeTypes.Add(".dxf", "application/dxf"); _mimeTypes.Add(".dxf", "image/vnd.dwg"); _mimeTypes.Add(".dxf", "image/x-dwg"); _mimeTypes.Add(".dxr", "application/x-director"); _mimeTypes.Add(".el", "text/x-script.elisp"); _mimeTypes.Add(".elc", "application/x-bytecode.elisp"); _mimeTypes.Add(".elc", "application/x-elc"); _mimeTypes.Add(".env", "application/x-envoy"); _mimeTypes.Add(".eps", "application/postscript"); _mimeTypes.Add(".es", "application/x-esrehber"); _mimeTypes.Add(".etx", "text/x-setext"); _mimeTypes.Add(".evy", "application/envoy"); _mimeTypes.Add(".evy", "application/x-envoy"); _mimeTypes.Add(".exe", "application/octet-stream"); _mimeTypes.Add(".f", "text/plain"); _mimeTypes.Add(".f", "text/x-fortran"); _mimeTypes.Add(".f77", "text/x-fortran"); _mimeTypes.Add(".f90", "text/plain"); _mimeTypes.Add(".f90", "text/x-fortran"); _mimeTypes.Add(".fdf", "application/vnd.fdf"); _mimeTypes.Add(".fif", "application/fractals"); _mimeTypes.Add(".fif", "image/fif"); _mimeTypes.Add(".fli", "video/fli"); _mimeTypes.Add(".fli", "video/x-fli"); _mimeTypes.Add(".flo", "image/florian"); _mimeTypes.Add(".flx", "text/vnd.fmi.flexstor"); _mimeTypes.Add(".fmf", "video/x-atomic3d-feature"); _mimeTypes.Add(".for", "text/plain"); _mimeTypes.Add(".for", "text/x-fortran"); _mimeTypes.Add(".fpx", "image/vnd.fpx"); _mimeTypes.Add(".fpx", "image/vnd.net-fpx"); _mimeTypes.Add(".frl", "application/freeloader"); _mimeTypes.Add(".funk", "audio/make"); _mimeTypes.Add(".g", "text/plain"); _mimeTypes.Add(".g3", "image/g3fax"); _mimeTypes.Add(".gif", "image/gif"); _mimeTypes.Add(".gl", "video/gl"); _mimeTypes.Add(".gl", "video/x-gl"); _mimeTypes.Add(".gsd", "audio/x-gsm"); _mimeTypes.Add(".gsm", "audio/x-gsm"); _mimeTypes.Add(".gsp", "application/x-gsp"); _mimeTypes.Add(".gss", "application/x-gss"); _mimeTypes.Add(".gtar", "application/x-gtar"); _mimeTypes.Add(".gz", "application/x-compressed"); _mimeTypes.Add(".gz", "application/x-gzip"); _mimeTypes.Add(".gzip", "application/x-gzip"); _mimeTypes.Add(".gzip", "multipart/x-gzip"); _mimeTypes.Add(".h", "text/plain"); _mimeTypes.Add(".h", "text/x-h"); _mimeTypes.Add(".hdf", "application/x-hdf"); _mimeTypes.Add(".help", "application/x-helpfile"); _mimeTypes.Add(".hgl", "application/vnd.hp-hpgl"); _mimeTypes.Add(".hh", "text/plain"); _mimeTypes.Add(".hh", "text/x-h"); _mimeTypes.Add(".hlb", "text/x-script"); _mimeTypes.Add(".hlp", "application/hlp"); _mimeTypes.Add(".hlp", "application/x-helpfile"); _mimeTypes.Add(".hlp", "application/x-winhelp"); _mimeTypes.Add(".hpg", "application/vnd.hp-hpgl"); _mimeTypes.Add(".hpgl", "application/vnd.hp-hpgl"); _mimeTypes.Add(".hqx", "application/binhex"); _mimeTypes.Add(".hqx", "application/binhex4"); _mimeTypes.Add(".hqx", "application/mac-binhex"); _mimeTypes.Add(".hqx", "application/mac-binhex40"); _mimeTypes.Add(".hqx", "application/x-binhex40"); _mimeTypes.Add(".hqx", "application/x-mac-binhex40"); _mimeTypes.Add(".hta", "application/hta"); _mimeTypes.Add(".htc", "text/x-component"); _mimeTypes.Add(".htm", "text/html"); _mimeTypes.Add(".html", "text/html"); _mimeTypes.Add(".htmls", "text/html"); _mimeTypes.Add(".htt", "text/webviewhtml"); _mimeTypes.Add(".htx", "text/html"); _mimeTypes.Add(".ice", "x-conference/x-cooltalk"); _mimeTypes.Add(".ico", "image/x-icon"); _mimeTypes.Add(".idc", "text/plain"); _mimeTypes.Add(".ief", "image/ief"); _mimeTypes.Add(".iefs", "image/ief"); _mimeTypes.Add(".iges", "application/iges"); _mimeTypes.Add(".iges", "model/iges"); _mimeTypes.Add(".igs", "application/iges"); _mimeTypes.Add(".igs", "model/iges"); _mimeTypes.Add(".ima", "application/x-ima"); _mimeTypes.Add(".imap", "application/x-httpd-imap"); _mimeTypes.Add(".inf", "application/inf"); _mimeTypes.Add(".ins", "application/x-internett-signup"); _mimeTypes.Add(".ip", "application/x-ip2"); _mimeTypes.Add(".isu", "video/x-isvideo"); _mimeTypes.Add(".it", "audio/it"); _mimeTypes.Add(".iv", "application/x-inventor"); _mimeTypes.Add(".ivr", "i-world/i-vrml"); _mimeTypes.Add(".ivy", "application/x-livescreen"); _mimeTypes.Add(".jam", "audio/x-jam"); _mimeTypes.Add(".jav", "text/plain"); _mimeTypes.Add(".jav", "text/x-java-source"); _mimeTypes.Add(".java", "text/plain"); _mimeTypes.Add(".java", "text/x-java-source"); _mimeTypes.Add(".jcm", "application/x-java-commerce"); _mimeTypes.Add(".jfif", "image/jpeg"); _mimeTypes.Add(".jfif", "image/pjpeg"); _mimeTypes.Add(".jfif-tbnl", "image/jpeg"); _mimeTypes.Add(".jpe", "image/jpeg"); _mimeTypes.Add(".jpe", "image/pjpeg"); _mimeTypes.Add(".jpeg", "image/jpeg"); _mimeTypes.Add(".jpeg", "image/pjpeg"); _mimeTypes.Add(".jpg", "image/jpeg"); _mimeTypes.Add(".jpg", "image/pjpeg"); _mimeTypes.Add(".jps", "image/x-jps"); _mimeTypes.Add(".js", "application/x-javascript"); _mimeTypes.Add(".js", "application/javascript"); _mimeTypes.Add(".js", "application/ecmascript"); _mimeTypes.Add(".js", "text/javascript"); _mimeTypes.Add(".js", "text/ecmascript"); _mimeTypes.Add(".jut", "image/jutvision"); _mimeTypes.Add(".kar", "audio/midi"); _mimeTypes.Add(".kar", "music/x-karaoke"); _mimeTypes.Add(".ksh", "application/x-ksh"); _mimeTypes.Add(".ksh", "text/x-script.ksh"); _mimeTypes.Add(".la", "audio/nspaudio"); _mimeTypes.Add(".la", "audio/x-nspaudio"); _mimeTypes.Add(".lam", "audio/x-liveaudio"); _mimeTypes.Add(".latex", "application/x-latex"); _mimeTypes.Add(".lha", "application/lha"); _mimeTypes.Add(".lha", "application/octet-stream"); _mimeTypes.Add(".lha", "application/x-lha"); _mimeTypes.Add(".lhx", "application/octet-stream"); _mimeTypes.Add(".list", "text/plain"); _mimeTypes.Add(".lma", "audio/nspaudio"); _mimeTypes.Add(".lma", "audio/x-nspaudio"); _mimeTypes.Add(".log", "text/plain"); _mimeTypes.Add(".lsp", "application/x-lisp"); _mimeTypes.Add(".lsp", "text/x-script.lisp"); _mimeTypes.Add(".lst", "text/plain"); _mimeTypes.Add(".lsx", "text/x-la-asf"); _mimeTypes.Add(".ltx", "application/x-latex"); _mimeTypes.Add(".lzh", "application/octet-stream"); _mimeTypes.Add(".lzh", "application/x-lzh"); _mimeTypes.Add(".lzx", "application/lzx"); _mimeTypes.Add(".lzx", "application/octet-stream"); _mimeTypes.Add(".lzx", "application/x-lzx"); _mimeTypes.Add(".m", "text/plain"); _mimeTypes.Add(".m", "text/x-m"); _mimeTypes.Add(".m1v", "video/mpeg"); _mimeTypes.Add(".m2a", "audio/mpeg"); _mimeTypes.Add(".m2v", "video/mpeg"); _mimeTypes.Add(".m3u", "audio/x-mpequrl"); _mimeTypes.Add(".man", "application/x-troff-man"); _mimeTypes.Add(".map", "application/x-navimap"); _mimeTypes.Add(".mar", "text/plain"); _mimeTypes.Add(".mbd", "application/mbedlet"); _mimeTypes.Add(".mc$", "application/x-magic-cap-package-1.0"); _mimeTypes.Add(".mcd", "application/mcad"); _mimeTypes.Add(".mcd", "application/x-mathcad"); _mimeTypes.Add(".mcf", "image/vasa"); _mimeTypes.Add(".mcf", "text/mcf"); _mimeTypes.Add(".mcp", "application/netmc"); _mimeTypes.Add(".me", "application/x-troff-me"); _mimeTypes.Add(".mht", "message/rfc822"); _mimeTypes.Add(".mhtml", "message/rfc822"); _mimeTypes.Add(".mid", "application/x-midi"); _mimeTypes.Add(".mid", "audio/midi"); _mimeTypes.Add(".mid", "audio/x-mid"); _mimeTypes.Add(".mid", "audio/x-midi"); _mimeTypes.Add(".mid", "music/crescendo"); _mimeTypes.Add(".mid", "x-music/x-midi"); _mimeTypes.Add(".midi", "application/x-midi"); _mimeTypes.Add(".midi", "audio/midi"); _mimeTypes.Add(".midi", "audio/x-mid"); _mimeTypes.Add(".midi", "audio/x-midi"); _mimeTypes.Add(".midi", "music/crescendo"); _mimeTypes.Add(".midi", "x-music/x-midi"); _mimeTypes.Add(".mif", "application/x-frame"); _mimeTypes.Add(".mif", "application/x-mif"); _mimeTypes.Add(".mime", "message/rfc822"); _mimeTypes.Add(".mime", "www/mime"); _mimeTypes.Add(".mjf", "audio/x-vnd.audioexplosion.mjuicemediafile"); _mimeTypes.Add(".mjpg", "video/x-motion-jpeg"); _mimeTypes.Add(".mm", "application/base64"); _mimeTypes.Add(".mm", "application/x-meme"); _mimeTypes.Add(".mme", "application/base64"); _mimeTypes.Add(".mod", "audio/mod"); _mimeTypes.Add(".mod", "audio/x-mod"); _mimeTypes.Add(".moov", "video/quicktime"); _mimeTypes.Add(".mov", "video/quicktime"); _mimeTypes.Add(".movie", "video/x-sgi-movie"); _mimeTypes.Add(".mp2", "audio/mpeg"); _mimeTypes.Add(".mp2", "audio/x-mpeg"); _mimeTypes.Add(".mp2", "video/mpeg"); _mimeTypes.Add(".mp2", "video/x-mpeg"); _mimeTypes.Add(".mp2", "video/x-mpeq2a"); _mimeTypes.Add(".mp3", "audio/mpeg3"); _mimeTypes.Add(".mp3", "audio/x-mpeg-3"); _mimeTypes.Add(".mp3", "video/mpeg"); _mimeTypes.Add(".mp3", "video/x-mpeg"); _mimeTypes.Add(".mpa", "audio/mpeg"); _mimeTypes.Add(".mpa", "video/mpeg"); _mimeTypes.Add(".mpc", "application/x-project"); _mimeTypes.Add(".mpe", "video/mpeg"); _mimeTypes.Add(".mpeg", "video/mpeg"); _mimeTypes.Add(".mpg", "audio/mpeg"); _mimeTypes.Add(".mpg", "video/mpeg"); _mimeTypes.Add(".mpga", "audio/mpeg"); _mimeTypes.Add(".mpp", "application/vnd.ms-project"); _mimeTypes.Add(".mpt", "application/x-project"); _mimeTypes.Add(".mpv", "application/x-project"); _mimeTypes.Add(".mpx", "application/x-project"); _mimeTypes.Add(".mrc", "application/marc"); _mimeTypes.Add(".ms", "application/x-troff-ms"); _mimeTypes.Add(".mv", "video/x-sgi-movie"); _mimeTypes.Add(".my", "audio/make"); _mimeTypes.Add(".mzz", "application/x-vnd.audioexplosion.mzz"); _mimeTypes.Add(".nap", "image/naplps"); _mimeTypes.Add(".naplps", "image/naplps"); _mimeTypes.Add(".nc", "application/x-netcdf"); _mimeTypes.Add(".ncm", "application/vnd.nokia.configuration-message"); _mimeTypes.Add(".nif", "image/x-niff"); _mimeTypes.Add(".niff", "image/x-niff"); _mimeTypes.Add(".nix", "application/x-mix-transfer"); _mimeTypes.Add(".nsc", "application/x-conference"); _mimeTypes.Add(".nvd", "application/x-navidoc"); _mimeTypes.Add(".o", "application/octet-stream"); _mimeTypes.Add(".oda", "application/oda"); _mimeTypes.Add(".omc", "application/x-omc"); _mimeTypes.Add(".omcd", "application/x-omcdatamaker"); _mimeTypes.Add(".omcr", "application/x-omcregerator"); _mimeTypes.Add(".p", "text/x-pascal"); _mimeTypes.Add(".p10", "application/pkcs10"); _mimeTypes.Add(".p10", "application/x-pkcs10"); _mimeTypes.Add(".p12", "application/pkcs-12"); _mimeTypes.Add(".p12", "application/x-pkcs12"); _mimeTypes.Add(".p7a", "application/x-pkcs7-signature"); _mimeTypes.Add(".p7c", "application/pkcs7-mime"); _mimeTypes.Add(".p7c", "application/x-pkcs7-mime"); _mimeTypes.Add(".p7m", "application/pkcs7-mime"); _mimeTypes.Add(".p7m", "application/x-pkcs7-mime"); _mimeTypes.Add(".p7r", "application/x-pkcs7-certreqresp"); _mimeTypes.Add(".p7s", "application/pkcs7-signature"); _mimeTypes.Add(".part", "application/pro_eng"); _mimeTypes.Add(".pas", "text/pascal"); _mimeTypes.Add(".pbm", "image/x-portable-bitmap"); _mimeTypes.Add(".pcl", "application/vnd.hp-pcl"); _mimeTypes.Add(".pcl", "application/x-pcl"); _mimeTypes.Add(".pct", "image/x-pict"); _mimeTypes.Add(".pcx", "image/x-pcx"); _mimeTypes.Add(".pdb", "chemical/x-pdb"); _mimeTypes.Add(".pdf", "application/pdf"); _mimeTypes.Add(".pfunk", "audio/make"); _mimeTypes.Add(".pfunk", "audio/make.my.funk"); _mimeTypes.Add(".pgm", "image/x-portable-graymap"); _mimeTypes.Add(".pgm", "image/x-portable-greymap"); _mimeTypes.Add(".pic", "image/pict"); _mimeTypes.Add(".pict", "image/pict"); _mimeTypes.Add(".pkg", "application/x-newton-compatible-pkg"); _mimeTypes.Add(".pko", "application/vnd.ms-pki.pko"); _mimeTypes.Add(".pl", "text/plain"); _mimeTypes.Add(".pl", "text/x-script.perl"); _mimeTypes.Add(".plx", "application/x-pixclscript"); _mimeTypes.Add(".pm", "image/x-xpixmap"); _mimeTypes.Add(".pm", "text/x-script.perl-module"); _mimeTypes.Add(".pm4", "application/x-pagemaker"); _mimeTypes.Add(".pm5", "application/x-pagemaker"); _mimeTypes.Add(".png", "image/png"); _mimeTypes.Add(".pnm", "application/x-portable-anymap"); _mimeTypes.Add(".pnm", "image/x-portable-anymap"); _mimeTypes.Add(".pot", "application/mspowerpoint"); _mimeTypes.Add(".pot", "application/vnd.ms-powerpoint"); _mimeTypes.Add(".pov", "model/x-pov"); _mimeTypes.Add(".ppa", "application/vnd.ms-powerpoint"); _mimeTypes.Add(".ppm", "image/x-portable-pixmap"); _mimeTypes.Add(".pps", "application/mspowerpoint"); _mimeTypes.Add(".pps", "application/vnd.ms-powerpoint"); _mimeTypes.Add(".ppt", "application/mspowerpoint"); _mimeTypes.Add(".ppt", "application/powerpoint"); _mimeTypes.Add(".ppt", "application/vnd.ms-powerpoint"); _mimeTypes.Add(".ppt", "application/x-mspowerpoint"); _mimeTypes.Add(".ppz", "application/mspowerpoint"); _mimeTypes.Add(".pre", "application/x-freelance"); _mimeTypes.Add(".prt", "application/pro_eng"); _mimeTypes.Add(".ps", "application/postscript"); _mimeTypes.Add(".psd", "application/octet-stream"); _mimeTypes.Add(".pvu", "paleovu/x-pv"); _mimeTypes.Add(".pwz", "application/vnd.ms-powerpoint"); _mimeTypes.Add(".py", "text/x-script.phyton"); _mimeTypes.Add(".pyc", "applicaiton/x-bytecode.python"); _mimeTypes.Add(".qcp", "audio/vnd.qcelp"); _mimeTypes.Add(".qd3", "x-world/x-3dmf"); _mimeTypes.Add(".qd3d", "x-world/x-3dmf"); _mimeTypes.Add(".qif", "image/x-quicktime"); _mimeTypes.Add(".qt", "video/quicktime"); _mimeTypes.Add(".qtc", "video/x-qtc"); _mimeTypes.Add(".qti", "image/x-quicktime"); _mimeTypes.Add(".qtif", "image/x-quicktime"); _mimeTypes.Add(".ra", "audio/x-pn-realaudio"); _mimeTypes.Add(".ra", "audio/x-pn-realaudio-plugin"); _mimeTypes.Add(".ra", "audio/x-realaudio"); _mimeTypes.Add(".ram", "audio/x-pn-realaudio"); _mimeTypes.Add(".ras", "application/x-cmu-raster"); _mimeTypes.Add(".ras", "image/cmu-raster"); _mimeTypes.Add(".ras", "image/x-cmu-raster"); _mimeTypes.Add(".rast", "image/cmu-raster"); _mimeTypes.Add(".rexx", "text/x-script.rexx"); _mimeTypes.Add(".rf", "image/vnd.rn-realflash"); _mimeTypes.Add(".rgb", "image/x-rgb"); _mimeTypes.Add(".rm", "application/vnd.rn-realmedia"); _mimeTypes.Add(".rm", "audio/x-pn-realaudio"); _mimeTypes.Add(".rmi", "audio/mid"); _mimeTypes.Add(".rmm", "audio/x-pn-realaudio"); _mimeTypes.Add(".rmp", "audio/x-pn-realaudio"); _mimeTypes.Add(".rmp", "audio/x-pn-realaudio-plugin"); _mimeTypes.Add(".rng", "application/ringing-tones"); _mimeTypes.Add(".rng", "application/vnd.nokia.ringing-tone"); _mimeTypes.Add(".rnx", "application/vnd.rn-realplayer"); _mimeTypes.Add(".roff", "application/x-troff"); _mimeTypes.Add(".rp", "image/vnd.rn-realpix"); _mimeTypes.Add(".rpm", "audio/x-pn-realaudio-plugin"); _mimeTypes.Add(".rt", "text/richtext"); _mimeTypes.Add(".rt", "text/vnd.rn-realtext"); _mimeTypes.Add(".rtf", "application/rtf"); _mimeTypes.Add(".rtf", "application/x-rtf"); _mimeTypes.Add(".rtf", "text/richtext"); _mimeTypes.Add(".rtx", "application/rtf"); _mimeTypes.Add(".rtx", "text/richtext"); _mimeTypes.Add(".rv", "video/vnd.rn-realvideo"); _mimeTypes.Add(".s", "text/x-asm"); _mimeTypes.Add(".s3m", "audio/s3m"); _mimeTypes.Add(".saveme", "application/octet-stream"); _mimeTypes.Add(".sbk", "application/x-tbook"); _mimeTypes.Add(".scm", "application/x-lotusscreencam"); _mimeTypes.Add(".scm", "text/x-script.guile"); _mimeTypes.Add(".scm", "text/x-script.scheme"); _mimeTypes.Add(".scm", "video/x-scm"); _mimeTypes.Add(".sdml", "text/plain"); _mimeTypes.Add(".sdp", "application/sdp"); _mimeTypes.Add(".sdp", "application/x-sdp"); _mimeTypes.Add(".sdr", "application/sounder"); _mimeTypes.Add(".sea", "application/sea"); _mimeTypes.Add(".sea", "application/x-sea"); _mimeTypes.Add(".set", "application/set"); _mimeTypes.Add(".sgm", "text/sgml"); _mimeTypes.Add(".sgm", "text/x-sgml"); _mimeTypes.Add(".sgml", "text/sgml"); _mimeTypes.Add(".sgml", "text/x-sgml"); _mimeTypes.Add(".sh", "application/x-bsh"); _mimeTypes.Add(".sh", "application/x-sh"); _mimeTypes.Add(".sh", "application/x-shar"); _mimeTypes.Add(".sh", "text/x-script.sh"); _mimeTypes.Add(".shar", "application/x-bsh"); _mimeTypes.Add(".shar", "application/x-shar"); _mimeTypes.Add(".shtml", "text/html"); _mimeTypes.Add(".shtml", "text/x-server-parsed-html"); _mimeTypes.Add(".sid", "audio/x-psid"); _mimeTypes.Add(".sit", "application/x-sit"); _mimeTypes.Add(".sit", "application/x-stuffit"); _mimeTypes.Add(".skd", "application/x-koan"); _mimeTypes.Add(".skm", "application/x-koan"); _mimeTypes.Add(".skp", "application/x-koan"); _mimeTypes.Add(".skt", "application/x-koan"); _mimeTypes.Add(".sl", "application/x-seelogo"); _mimeTypes.Add(".smi", "application/smil"); _mimeTypes.Add(".smil", "application/smil"); _mimeTypes.Add(".snd", "audio/basic"); _mimeTypes.Add(".snd", "audio/x-adpcm"); _mimeTypes.Add(".sol", "application/solids"); _mimeTypes.Add(".spc", "application/x-pkcs7-certificates"); _mimeTypes.Add(".spc", "text/x-speech"); _mimeTypes.Add(".spl", "application/futuresplash"); _mimeTypes.Add(".spr", "application/x-sprite"); _mimeTypes.Add(".sprite", "application/x-sprite"); _mimeTypes.Add(".src", "application/x-wais-source"); _mimeTypes.Add(".ssi", "text/x-server-parsed-html"); _mimeTypes.Add(".ssm", "application/streamingmedia"); _mimeTypes.Add(".sst", "application/vnd.ms-pki.certstore"); _mimeTypes.Add(".step", "application/step"); _mimeTypes.Add(".stl", "application/sla"); _mimeTypes.Add(".stl", "application/vnd.ms-pki.stl"); _mimeTypes.Add(".stl", "application/x-navistyle"); _mimeTypes.Add(".stp", "application/step"); _mimeTypes.Add(".sv4cpio", "application/x-sv4cpio"); _mimeTypes.Add(".sv4crc", "application/x-sv4crc"); _mimeTypes.Add(".svf", "image/vnd.dwg"); _mimeTypes.Add(".svf", "image/x-dwg"); _mimeTypes.Add(".svr", "application/x-world"); _mimeTypes.Add(".svr", "x-world/x-svr"); _mimeTypes.Add(".swf", "application/x-shockwave-flash"); _mimeTypes.Add(".t", "application/x-troff"); _mimeTypes.Add(".talk", "text/x-speech"); _mimeTypes.Add(".tar", "application/x-tar"); _mimeTypes.Add(".tbk", "application/toolbook"); _mimeTypes.Add(".tbk", "application/x-tbook"); _mimeTypes.Add(".tcl", "application/x-tcl"); _mimeTypes.Add(".tcl", "text/x-script.tcl"); _mimeTypes.Add(".tcsh", "text/x-script.tcsh"); _mimeTypes.Add(".tex", "application/x-tex"); _mimeTypes.Add(".texi", "application/x-texinfo"); _mimeTypes.Add(".texinfo", "application/x-texinfo"); _mimeTypes.Add(".text", "application/plain"); _mimeTypes.Add(".text", "text/plain"); _mimeTypes.Add(".tgz", "application/gnutar"); _mimeTypes.Add(".tgz", "application/x-compressed"); _mimeTypes.Add(".tif", "image/tiff"); _mimeTypes.Add(".tif", "image/x-tiff"); _mimeTypes.Add(".tiff", "image/tiff"); _mimeTypes.Add(".tiff", "image/x-tiff"); _mimeTypes.Add(".tr", "application/x-troff"); _mimeTypes.Add(".tsi", "audio/tsp-audio"); _mimeTypes.Add(".tsp", "application/dsptype"); _mimeTypes.Add(".tsp", "audio/tsplayer"); _mimeTypes.Add(".tsv", "text/tab-separated-values"); _mimeTypes.Add(".turbot", "image/florian"); _mimeTypes.Add(".txt", "text/plain"); _mimeTypes.Add(".uil", "text/x-uil"); _mimeTypes.Add(".uni", "text/uri-list"); _mimeTypes.Add(".unis", "text/uri-list"); _mimeTypes.Add(".unv", "application/i-deas"); _mimeTypes.Add(".uri", "text/uri-list"); _mimeTypes.Add(".uris", "text/uri-list"); _mimeTypes.Add(".ustar", "application/x-ustar"); _mimeTypes.Add(".ustar", "multipart/x-ustar"); _mimeTypes.Add(".uu", "application/octet-stream"); _mimeTypes.Add(".uu", "text/x-uuencode"); _mimeTypes.Add(".uue", "text/x-uuencode"); _mimeTypes.Add(".vcd", "application/x-cdlink"); _mimeTypes.Add(".vcs", "text/x-vcalendar"); _mimeTypes.Add(".vda", "application/vda"); _mimeTypes.Add(".vdo", "video/vdo"); _mimeTypes.Add(".vew", "application/groupwise"); _mimeTypes.Add(".viv", "video/vivo"); _mimeTypes.Add(".viv", "video/vnd.vivo"); _mimeTypes.Add(".vivo", "video/vivo"); _mimeTypes.Add(".vivo", "video/vnd.vivo"); _mimeTypes.Add(".vmd", "application/vocaltec-media-desc"); _mimeTypes.Add(".vmf", "application/vocaltec-media-file"); _mimeTypes.Add(".voc", "audio/voc"); _mimeTypes.Add(".voc", "audio/x-voc"); _mimeTypes.Add(".vos", "video/vosaic"); _mimeTypes.Add(".vox", "audio/voxware"); _mimeTypes.Add(".vqe", "audio/x-twinvq-plugin"); _mimeTypes.Add(".vqf", "audio/x-twinvq"); _mimeTypes.Add(".vql", "audio/x-twinvq-plugin"); _mimeTypes.Add(".vrml", "application/x-vrml"); _mimeTypes.Add(".vrml", "model/vrml"); _mimeTypes.Add(".vrml", "x-world/x-vrml"); _mimeTypes.Add(".vrt", "x-world/x-vrt"); _mimeTypes.Add(".vsd", "application/x-visio"); _mimeTypes.Add(".vst", "application/x-visio"); _mimeTypes.Add(".vsw", "application/x-visio"); _mimeTypes.Add(".w60", "application/wordperfect6.0"); _mimeTypes.Add(".w61", "application/wordperfect6.1"); _mimeTypes.Add(".w6w", "application/msword"); _mimeTypes.Add(".wav", "audio/wav"); _mimeTypes.Add(".wav", "audio/x-wav"); _mimeTypes.Add(".wb1", "application/x-qpro"); _mimeTypes.Add(".wbmp", "image/vnd.wap.wbmp"); _mimeTypes.Add(".web", "application/vnd.xara"); _mimeTypes.Add(".wiz", "application/msword"); _mimeTypes.Add(".wk1", "application/x-123"); _mimeTypes.Add(".wmf", "windows/metafile"); _mimeTypes.Add(".wml", "text/vnd.wap.wml"); _mimeTypes.Add(".wmlc", "application/vnd.wap.wmlc"); _mimeTypes.Add(".wmls", "text/vnd.wap.wmlscript"); _mimeTypes.Add(".wmlsc", "application/vnd.wap.wmlscriptc"); _mimeTypes.Add(".word", "application/msword"); _mimeTypes.Add(".wp", "application/wordperfect"); _mimeTypes.Add(".wp5", "application/wordperfect"); _mimeTypes.Add(".wp5", "application/wordperfect6.0"); _mimeTypes.Add(".wp6", "application/wordperfect"); _mimeTypes.Add(".wpd", "application/wordperfect"); _mimeTypes.Add(".wpd", "application/x-wpwin"); _mimeTypes.Add(".wq1", "application/x-lotus"); _mimeTypes.Add(".wri", "application/mswrite"); _mimeTypes.Add(".wri", "application/x-wri"); _mimeTypes.Add(".wrl", "application/x-world"); _mimeTypes.Add(".wrl", "model/vrml"); _mimeTypes.Add(".wrl", "x-world/x-vrml"); _mimeTypes.Add(".wrz", "model/vrml"); _mimeTypes.Add(".wrz", "x-world/x-vrml"); _mimeTypes.Add(".wsc", "text/scriplet"); _mimeTypes.Add(".wsrc", "application/x-wais-source"); _mimeTypes.Add(".wtk", "application/x-wintalk"); _mimeTypes.Add(".xbm", "image/x-xbitmap"); _mimeTypes.Add(".xbm", "image/x-xbm"); _mimeTypes.Add(".xbm", "image/xbm"); _mimeTypes.Add(".xdr", "video/x-amt-demorun"); _mimeTypes.Add(".xgz", "xgl/drawing"); _mimeTypes.Add(".xif", "image/vnd.xiff"); _mimeTypes.Add(".xl", "application/excel"); _mimeTypes.Add(".xla", "application/excel"); _mimeTypes.Add(".xla", "application/x-excel"); _mimeTypes.Add(".xla", "application/x-msexcel"); _mimeTypes.Add(".xlb", "application/excel"); _mimeTypes.Add(".xlb", "application/vnd.ms-excel"); _mimeTypes.Add(".xlb", "application/x-excel"); _mimeTypes.Add(".xlc", "application/excel"); _mimeTypes.Add(".xlc", "application/vnd.ms-excel"); _mimeTypes.Add(".xlc", "application/x-excel"); _mimeTypes.Add(".xld", "application/excel"); _mimeTypes.Add(".xld", "application/x-excel"); _mimeTypes.Add(".xlk", "application/excel"); _mimeTypes.Add(".xlk", "application/x-excel"); _mimeTypes.Add(".xll", "application/excel"); _mimeTypes.Add(".xll", "application/vnd.ms-excel"); _mimeTypes.Add(".xll", "application/x-excel"); _mimeTypes.Add(".xlm", "application/excel"); _mimeTypes.Add(".xlm", "application/vnd.ms-excel"); _mimeTypes.Add(".xlm", "application/x-excel"); _mimeTypes.Add(".xls", "application/excel"); _mimeTypes.Add(".xls", "application/vnd.ms-excel"); _mimeTypes.Add(".xls", "application/x-excel"); _mimeTypes.Add(".xls", "application/x-msexcel"); _mimeTypes.Add(".xlt", "application/excel"); _mimeTypes.Add(".xlt", "application/x-excel"); _mimeTypes.Add(".xlv", "application/excel"); _mimeTypes.Add(".xlv", "application/x-excel"); _mimeTypes.Add(".xlw", "application/excel"); _mimeTypes.Add(".xlw", "application/vnd.ms-excel"); _mimeTypes.Add(".xlw", "application/x-excel"); _mimeTypes.Add(".xlw", "application/x-msexcel"); _mimeTypes.Add(".xm", "audio/xm"); _mimeTypes.Add(".xml", "application/xml"); _mimeTypes.Add(".xml", "text/xml"); _mimeTypes.Add(".xmz", "xgl/movie"); _mimeTypes.Add(".xpix", "application/x-vnd.ls-xpix"); _mimeTypes.Add(".xpm", "image/x-xpixmap"); _mimeTypes.Add(".xpm", "image/xpm"); _mimeTypes.Add(".x-png", "image/png"); _mimeTypes.Add(".xsr", "video/x-amt-showrun"); _mimeTypes.Add(".xwd", "image/x-xwd"); _mimeTypes.Add(".xwd", "image/x-xwindowdump"); _mimeTypes.Add(".xyz", "chemical/x-pdb"); _mimeTypes.Add(".z", "application/x-compress"); _mimeTypes.Add(".z", "application/x-compressed"); _mimeTypes.Add(".zip", "application/x-compressed"); _mimeTypes.Add(".zip", "application/x-zip-compressed"); _mimeTypes.Add(".zip", "application/zip"); _mimeTypes.Add(".zip", "multipart/x-zip"); _mimeTypes.Add(".zoo", "application/octet-stream"); _mimeTypes.Add(".zsh", "text/x-script.zsh"); //agentsmith spellcheck enable return _mimeTypes; } } #endregion #region Properties /// /// Gets or Sets a value indicating if document encoding must be automatically detected. /// public bool AutoDetectEncoding { get { return _autoDetectEncoding; } set { _autoDetectEncoding = value; } } /// /// Gets or Sets a value indicating whether to get document only from the cache. /// If this is set to true and document is not found in the cache, nothing will be loaded. /// public bool CacheOnly { get { return _cacheOnly; } set { if ((value) && !UsingCache) { throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first."); } _cacheOnly = value; } } /// /// Gets or Sets the cache path. If null, no caching mechanism will be used. /// public string CachePath { get { return _cachePath; } set { _cachePath = value; } } /// /// Gets a value indicating if the last document was retrieved from the cache. /// public bool FromCache { get { return _fromCache; } } /// /// Gets the last request duration in milliseconds. /// public int RequestDuration { get { return _requestDuration; } } /// /// Gets the URI of the Internet resource that actually responded to the request. /// public Uri ResponseUri { get { return _responseUri; } } /// /// Gets the last request status. /// public HttpStatusCode StatusCode { get { return _statusCode; } } /// /// Gets or Sets the size of the buffer used for memory operations. /// public int StreamBufferSize { get { return _streamBufferSize; } set { if (_streamBufferSize <= 0) { throw new ArgumentException("Size must be greater than zero."); } _streamBufferSize = value; } } /// /// Gets or Sets a value indicating if cookies will be stored. /// public bool UseCookies { get { return _useCookies; } set { _useCookies = value; } } /// /// Gets or Sets the User Agent HTTP 1.1 header sent on any webrequest /// public string UserAgent { get { return _userAgent; } set { _userAgent = value; } } /// /// Gets or Sets a value indicating whether the caching mechanisms should be used or not. /// public bool UsingCache { get { return _cachePath != null && _usingCache; } set { if ((value) && (_cachePath == null)) { throw new HtmlWebException("You need to define a CachePath first."); } _usingCache = value; } } #endregion #region Public Methods /// /// Gets the MIME content type for a given path extension. /// /// The input path extension. /// The default content type to return if any error occurs. /// The path extension's MIME content type. public static string GetContentTypeForExtension(string extension, string def) { if (string.IsNullOrEmpty(extension)) { return def; } string contentType = ""; if (!SecurityManager.IsGranted(new RegistryPermission(PermissionState.Unrestricted))) { if (MimeTypes.ContainsKey(extension)) contentType = MimeTypes[extension]; else contentType = def; } if (!SecurityManager.IsGranted(new DnsPermission(PermissionState.Unrestricted))) { //do something.... not at full trust try { RegistryKey reg = Registry.ClassesRoot; reg = reg.OpenSubKey(extension, false); if (reg != null) contentType = (string)reg.GetValue("", def); } catch (Exception) { contentType = def; } } return contentType; } /// /// Gets the path extension for a given MIME content type. /// /// The input MIME content type. /// The default path extension to return if any error occurs. /// The MIME content type's path extension. public static string GetExtensionForContentType(string contentType, string def) { if (string.IsNullOrEmpty(contentType)) { return def; } string ext = ""; if (!SecurityManager.IsGranted(new RegistryPermission(PermissionState.Unrestricted))) { if (MimeTypes.ContainsValue(contentType)) { foreach (KeyValuePair pair in MimeTypes) if (pair.Value == contentType) return pair.Value; } return def; } if (SecurityManager.IsGranted(new RegistryPermission(PermissionState.Unrestricted))) { try { RegistryKey reg = Registry.ClassesRoot; reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false); if (reg != null) ext = (string)reg.GetValue("Extension", def); } catch (Exception) { ext = def; } } return ext; } /// /// Creates an instance of the given type from the specified Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The requested type. /// An newly created instance. public object CreateInstance(string url, Type type) { return CreateInstance(url, null, null, type); } /// /// Creates an instance of the given type from the specified Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The URL that specifies the XSLT stylesheet to load. /// An containing the namespace-qualified arguments used as input to the transform. /// The requested type. /// An newly created instance. public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type) { return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null); } /// /// Creates an instance of the given type from the specified Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The URL that specifies the XSLT stylesheet to load. /// An containing the namespace-qualified arguments used as input to the transform. /// The requested type. /// A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes. /// An newly created instance. public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type, string xmlPath) { StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); if (xsltUrl == null) { LoadHtmlAsXml(htmlUrl, writer); } else { if (xmlPath == null) { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer); } else { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath); } } writer.Flush(); StringReader sr = new StringReader(sw.ToString()); XmlTextReader reader = new XmlTextReader(sr); XmlSerializer serializer = new XmlSerializer(type); object o; try { o = serializer.Deserialize(reader); } catch (InvalidOperationException ex) { throw new Exception(ex + ", --- xml:" + sw); } return o; } /// /// Gets an HTML document from an Internet resource and saves it to the specified file. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The location of the file where you want to save the document. public void Get(string url, string path) { Get(url, path, "GET"); } /// /// Gets an HTML document from an Internet resource and saves it to the specified file. - Proxy aware /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The location of the file where you want to save the document. /// /// public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials) { Get(url, path, proxy, credentials, "GET"); } /// /// Gets an HTML document from an Internet resource and saves it to the specified file. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The location of the file where you want to save the document. /// The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND. public void Get(string url, string path, string method) { Uri uri = new Uri(url); if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { Get(uri, method, path, null, null, null); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } /// /// Gets an HTML document from an Internet resource and saves it to the specified file. Understands Proxies /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The location of the file where you want to save the document. /// /// The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND. /// public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials, string method) { Uri uri = new Uri(url); if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { Get(uri, method, path, null, proxy, credentials); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } /// /// Gets the cache file path for a specified url. /// /// The url fo which to retrieve the cache path. May not be null. /// The cache file path. public string GetCachePath(Uri uri) { if (uri == null) { throw new ArgumentNullException("uri"); } if (!UsingCache) { throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first."); } string cachePath; if (uri.AbsolutePath == "/") { cachePath = Path.Combine(_cachePath, ".htm"); } else { cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\')); } return cachePath; } /// /// Gets an HTML document from an Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// A new HTML document. public HtmlDocument Load(string url) { return Load(url, "GET"); } /// /// Gets an HTML document from an Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// Host to use for Proxy /// Port the Proxy is on /// User Id for Authentication /// Password for Authentication /// A new HTML document. public HtmlDocument Load(string url, string proxyHost, int proxyPort, string userId, string password) { //Create my proxy WebProxy myProxy = new WebProxy(proxyHost, proxyPort); myProxy.BypassProxyOnLocal = true; //Create my credentials NetworkCredential myCreds = null; if ((userId != null) && (password != null)) { myCreds = new NetworkCredential(userId, password); CredentialCache credCache = new CredentialCache(); //Add the creds credCache.Add(myProxy.Address, "Basic", myCreds); credCache.Add(myProxy.Address, "Digest", myCreds); } return Load(url, "GET", myProxy, myCreds); } /// /// Loads an HTML document from an Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND. /// A new HTML document. public HtmlDocument Load(string url, string method) { Uri uri = new Uri(url); HtmlDocument doc; if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { doc = LoadUrl(uri, method, null, null); } else { if (uri.Scheme == Uri.UriSchemeFile) { doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionAutoCloseOnEnd = true; doc.DetectEncodingAndLoad(url, _autoDetectEncoding); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } if (PreHandleDocument != null) { PreHandleDocument(doc); } return doc; } /// /// Loads an HTML document from an Internet resource. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND. /// Proxy to use with this request /// Credentials to use when authenticating /// A new HTML document. public HtmlDocument Load(string url, string method, WebProxy proxy, NetworkCredential credentials) { Uri uri = new Uri(url); HtmlDocument doc; if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { doc = LoadUrl(uri, method, proxy, credentials); } else { if (uri.Scheme == Uri.UriSchemeFile) { doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionAutoCloseOnEnd = true; doc.DetectEncodingAndLoad(url, _autoDetectEncoding); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } if (PreHandleDocument != null) { PreHandleDocument(doc); } return doc; } /// /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The XmlTextWriter to which you want to save. public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer) { HtmlDocument doc = Load(htmlUrl); doc.Save(writer); } /// /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". /// The URL that specifies the XSLT stylesheet to load. /// An XsltArgumentList containing the namespace-qualified arguments used as input to the transform. /// The XmlTextWriter to which you want to save. public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer) { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null); } /// /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation. /// /// The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null. /// The URL that specifies the XSLT stylesheet to load. /// An XsltArgumentList containing the namespace-qualified arguments used as input to the transform. /// The XmlTextWriter to which you want to save. /// A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes. public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer, string xmlPath) { if (htmlUrl == null) { throw new ArgumentNullException("htmlUrl"); } HtmlDocument doc = Load(htmlUrl); if (xmlPath != null) { XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding); doc.Save(w); w.Close(); } if (xsltArgs == null) { xsltArgs = new XsltArgumentList(); } // add some useful variables to the xslt doc xsltArgs.AddParam("url", "", htmlUrl); xsltArgs.AddParam("requestDuration", "", RequestDuration); xsltArgs.AddParam("fromCache", "", FromCache); XslCompiledTransform xslt = new XslCompiledTransform(); xslt.Load(xsltUrl); xslt.Transform(doc, xsltArgs, writer); } #endregion #region Private Methods private static void FilePreparePath(string target) { if (File.Exists(target)) { FileAttributes atts = File.GetAttributes(target); File.SetAttributes(target, atts & ~FileAttributes.ReadOnly); } else { string dir = Path.GetDirectoryName(target); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } } } private static DateTime RemoveMilliseconds(DateTime t) { return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0); } // ReSharper disable UnusedMethodReturnValue.Local private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize) // ReSharper restore UnusedMethodReturnValue.Local { FilePreparePath(path); FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write); BinaryReader br = null; BinaryWriter bw = null; long len = 0; try { br = new BinaryReader(stream); bw = new BinaryWriter(fs); byte[] buffer; do { buffer = br.ReadBytes(streamBufferSize); len += buffer.Length; if (buffer.Length > 0) { bw.Write(buffer); } } while (buffer.Length > 0); } finally { if (br != null) { br.Close(); } if (bw != null) { bw.Flush(); bw.Close(); } if (fs != null) { fs.Close(); } } File.SetLastWriteTime(path, touchDate); return len; } private bool IsGZipEncoding(string contentEncoding) { return contentEncoding.ToLower().StartsWith("gzip"); } private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy, ICredentials creds) { string cachePath = null; HttpWebRequest req; bool oldFile = false; req = WebRequest.Create(uri) as HttpWebRequest; req.Method = method; req.UserAgent = UserAgent; if (proxy != null) { if (creds != null) { proxy.Credentials = creds; req.Credentials = creds; } else { proxy.Credentials = CredentialCache.DefaultCredentials; req.Credentials = CredentialCache.DefaultCredentials; } req.Proxy = proxy; } _fromCache = false; _requestDuration = 0; int tc = Environment.TickCount; if (UsingCache) { cachePath = GetCachePath(req.RequestUri); if (File.Exists(cachePath)) { req.IfModifiedSince = File.GetLastAccessTime(cachePath); oldFile = true; } } if (_cacheOnly) { if (!File.Exists(cachePath)) { throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'"); } if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file if (cachePath != null) File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } _fromCache = true; return HttpStatusCode.NotModified; } if (_useCookies) { req.CookieContainer = new CookieContainer(); } if (PreRequest != null) { // allow our user to change the request at will if (!PreRequest(req)) { return HttpStatusCode.ResetContent; } // dump cookie // if (_useCookies) // { // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri)) // { // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain); // } // } } HttpWebResponse resp; try { resp = req.GetResponse() as HttpWebResponse; } catch (WebException we) { _requestDuration = Environment.TickCount - tc; resp = (HttpWebResponse)we.Response; if (resp == null) { if (oldFile) { if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return HttpStatusCode.NotModified; } throw; } } catch (Exception) { _requestDuration = Environment.TickCount - tc; throw; } // allow our user to get some info from the response if (PostResponse != null) { PostResponse(req, resp); } _requestDuration = Environment.TickCount - tc; _responseUri = resp.ResponseUri; bool html = IsHtmlContent(resp.ContentType); /*Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding) ? Encoding.GetEncoding(resp.ContentEncoding) : null; if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length > 0)) { try { respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding); } catch { respenc = null; } } else { respenc = null; }*/ Encoding respenc = null; var isGZipEncoding = false; if (!string.IsNullOrEmpty(resp.ContentEncoding)) { isGZipEncoding = IsGZipEncoding(resp.ContentEncoding); if (!isGZipEncoding) { respenc = Encoding.GetEncoding(resp.ContentEncoding); } else { if (string.Compare(resp.CharacterSet, "ISO-8859-1", true, System.Globalization.CultureInfo.InvariantCulture) == 0) respenc = System.Text.Encoding.GetEncoding("GB2312"); else respenc = System.Text.Encoding.GetEncoding(resp.CharacterSet); } } else if ((resp.CharacterSet != null) && (resp.CharacterSet.Length > 0))//根据Content-Type中获取的charset { if (string.Compare(resp.CharacterSet, "ISO-8859-1", true, System.Globalization.CultureInfo.InvariantCulture) == 0) respenc = System.Text.Encoding.GetEncoding("GB2312"); else respenc = System.Text.Encoding.GetEncoding(resp.CharacterSet); } if (resp.StatusCode == HttpStatusCode.NotModified) { if (UsingCache) { _fromCache = true; if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return resp.StatusCode; } // this should *never* happen... throw new HtmlWebException("Server has send a NotModifed code, without cache enabled."); } //Stream s = resp.GetResponseStream(); Stream s; if (isGZipEncoding) { s = new GZipStream(resp.GetResponseStream(), CompressionMode.Decompress); } else { s = resp.GetResponseStream(); } if (s != null) { if (UsingCache) { // NOTE: LastModified does not contain milliseconds, so we remove them to the file SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize); // save headers SaveCacheHeaders(req.RequestUri, resp); if (path != null) { // copy and touch the file IOLibrary.CopyAlways(cachePath, path); File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } } else { // try to work in-memory if ((doc != null) && (html)) { if (respenc != null) { doc.Load(s, respenc); } else { doc.Load(s, true); } } } resp.Close(); } return resp.StatusCode; } private string GetCacheHeader(Uri requestUri, string name, string def) { // note: some headers are collection (ex: www-authenticate) // we don't handle that here XmlDocument doc = new XmlDocument(); doc.Load(GetCacheHeadersPath(requestUri)); XmlNode node = doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" + name.ToUpper() + "']"); if (node == null) { return def; } // attribute should exist return node.Attributes[name].Value; } private string GetCacheHeadersPath(Uri uri) { //return Path.Combine(GetCachePath(uri), ".h.xml"); return GetCachePath(uri) + ".h.xml"; } private bool IsCacheHtmlContent(string path) { string ct = GetContentTypeForExtension(Path.GetExtension(path), null); return IsHtmlContent(ct); } private bool IsHtmlContent(string contentType) { return contentType.ToLower().StartsWith("text/html"); } private HtmlDocument LoadUrl(Uri uri, string method, WebProxy proxy, NetworkCredential creds) { HtmlDocument doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionFixNestedTags = true; _statusCode = Get(uri, method, null, doc, proxy, creds); if (_statusCode == HttpStatusCode.NotModified) { // read cached encoding doc.DetectEncodingAndLoad(GetCachePath(uri)); } return doc; } private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp) { // we cache the original headers aside the cached document. string file = GetCacheHeadersPath(requestUri); XmlDocument doc = new XmlDocument(); doc.LoadXml(""); XmlNode cache = doc.FirstChild; foreach (string header in resp.Headers) { XmlNode entry = doc.CreateElement("h"); XmlAttribute att = doc.CreateAttribute("n"); att.Value = header; entry.Attributes.Append(att); att = doc.CreateAttribute("v"); att.Value = resp.Headers[header]; entry.Attributes.Append(att); cache.AppendChild(entry); } doc.Save(file); } #endregion } }