猎凡网络承接仿站、网站定制业务,诚邀您的合作! 猎凡网为你提供:dedecms织梦仿站帝国cms仿站网站定制、企业仿站等服务,欢迎您的访问!
<
>
当前位置: 首页 > 仿站知识

一步步教你如何打造一个网站克隆工具仿站

时间:2019-09-16 09:00:03  来源:admin  作者:猎凡网络

前两天朋友叫我模仿一个网站,刚刚开始,我一个页面一个页面查看源码并复制和保存,花了我很多时间,一个字“累”,为了减轻工作量,我写了个网站“克隆工具”,一键克隆,比起人工操作,
效率提高了200%以上,精确度也大大提高,下面我将我写的“网站克隆工具”实现方法分享给大家。

一睹为快,先看看界面:

一步步教你如何打造一个网站克隆工具<a href=https://fangzhan.seobr.com/ target=_blank class=infotextkey>仿站</a>

开发工具:vs2012(winform)

1.新建UrlModel模型

public class UrlModel    { public string RelatedPath { get; set; } public string AbsoluteUri { get; set; } public string CurrPath { get; set; } public string RootPath { get; set; } public string Host { get; set; } public int Port { get; set; } public string Scheme { get; set; }    }

2.新建UrlParser解析器

public class UrlParser    { public static UrlModel Parse(string url)        {            UrlModel model = new UrlModel(); //默认 if (url.Length < 8) throw new Exception("url参数不正确"); else if (!url.ToLower().StartsWith("http:") && !url.ToLower().StartsWith("https:")) throw new Exception("url格式有误"); if (url.LastIndexOf('/') < 8)                url = url + "/";            Regex reg = new Regex("(?<scheme>(http|https))://(?<host>.+?)/", RegexOptions.Singleline); if (reg.IsMatch(url))            { string scheme = reg.Match(url).Groups["scheme"].Value; string host = reg.Match(url).Groups["host"].Value; if (host.Contains(":"))                { var aa = host.Split(':'); if (aa.Length == 2)                    {                        model.Host = aa[0];                        model.Port = int.Parse(aa[1]);                    }                } else {                    model.Host = host;                    model.Port = 80;                } int index = url.IndexOf('/', 8);                model.RelatedPath = url.Substring(index);                model.AbsoluteUri = url;                model.Scheme = scheme;                model.CurrPath = url.Substring(0, url.LastIndexOf("/")); if (80 == model.Port)                {                    model.RootPath = string.Format("{0}://{1}", model.Scheme, model.Host);                } else {                    model.RootPath = string.Format("{0}://{1}:{2", model.Scheme, model.Host, model.Port);                }            } else { throw new Exception("url解析失败!");            } return model;        }    }

3.网页处理服务工具

/// <summary> /// 网页处理服务工具 /// </summary> public class WebPageService    { private static string[] excludekeys = { "http:", "https:", "//", "#", "javascript:", "?", "tel:", "mailto:" }; /// <summary> /// 获取所有html元素的href属性值,只获取站点本地的链接,站外的不获取 /// </summary> /// <param name="html">页面的html源码</param> /// <returns></returns> public static List<UrlModel> GetLocalHrefs(string url,string html)        { if (string.IsNullOrEmpty(html)) return new List<UrlModel>();            Dictionary<string, UrlModel> urls = GetHrefs(url,html);            List<UrlModel> newUrls = new List<UrlModel>(); if (null != urls)            { foreach (string key in urls.Keys)                { string newkey = key.ToLower(); bool iscontained = false; foreach (var exkey in excludekeys)                    { if (newkey.IndexOf(exkey) == 0)                        {                            iscontained = true; break;                        }                    } if (!iscontained) { //只获取本地路径  newUrls.Add(urls[key]);                    }                }            } return newUrls;        } /// <summary> /// 获取所有html元素的src属性值,只获取站点本地的链接,站外的不获取 /// </summary> /// <param name="html">页面的html源码</param> /// <returns></returns> public static List<UrlModel> GetLocalSrcs(string url,string html)        { if (string.IsNullOrEmpty(html)) return new List<UrlModel>();            Dictionary<string, UrlModel> urls = GetSrc(url, html);            List<UrlModel> newUrls = new List<UrlModel>(); if (null != urls)            { foreach (string key in urls.Keys)                { string newkey = key.ToLower(); bool iscontained = false; foreach (var exkey in excludekeys)                    { if (newkey.IndexOf(exkey) == 0)                        {                            iscontained = true; break;                        }                    } if (!iscontained)                    { //只获取本地路径  newUrls.Add(urls[key]);                    }                }            } return newUrls;        } private static Dictionary<string, UrlModel> GetHrefs(string url,string html)        { if (string.IsNullOrEmpty(html)) return null;            UrlModel currUrl = UrlParser.Parse(url);            Dictionary<string, UrlModel> urls = new Dictionary<string, UrlModel>();            Regex reg = new Regex("href=\"(?<Url>.+?)\"", RegexOptions.IgnoreCase); if (currUrl != null)            {                AddUrlModel(html, currUrl, urls, reg);            } return urls;        } private static Dictionary<string, UrlModel> GetSrc(string url,string html)        { if (string.IsNullOrEmpty(html)) return null;            UrlModel currUrl = UrlParser.Parse(url);            Dictionary<string, UrlModel> urls = new Dictionary<string, UrlModel>();            Regex reg = new Regex("(src=\"(?<Url>.+?)\"|url\\((?<Url>.+?)\\))", RegexOptions.IgnoreCase); if (currUrl != null)            {                AddUrlModel(html, currUrl, urls, reg);            } return urls;        } private static void AddUrlModel(string html, UrlModel currUrl, Dictionary<string, UrlModel> urls, Regex reg)        { if (reg.IsMatch(html))            {                MatchCollection matchs = reg.Matches(html); foreach (Match item in matchs)                { try { string strUrl = item.Groups["Url"].Value;                        UrlModel model = new UrlModel();                        model.RelatedPath = strUrl;                        model.CurrPath = currUrl.CurrPath;                        model.RootPath = currUrl.RootPath;                        model.Scheme = currUrl.Scheme;                        model.Port = currUrl.Port;                        model.Host = currUrl.Host; if (strUrl.StartsWith("/"))                        { //绝对目录情况下 model.AbsoluteUri = string.Format("{0}{1}", model.RootPath, model.RelatedPath);                        } else { //相对目录情况下 string currPath = model.CurrPath; int depth = 0; string path = model.RelatedPath; if (path.StartsWith(".."))                            { try { while (path.StartsWith(".."))                                    {                                        depth++;                                        path = path.Substring(3);                                        currPath = currPath.Substring(0, currPath.LastIndexOf("/"));                                    }                                    model.AbsoluteUri = string.Format("{0}/{1}", currPath, path);                                } catch {                                }                            } else {                                model.AbsoluteUri = string.Format("{0}/{1}", currPath, path);                            }                        }                        strUrl = strUrl.Trim().ToLower();                        urls.Add(strUrl, model);                    } catch {                    }                }            }        }    }

4.新建网站克隆接口

interface IWebCloneWorker    { void Start(); void Cancel();    }

5.新建实现

public class WebCloneWorker : IWebCloneWorker    { //网站页面克隆深度(如:0-首页,1-分类页,2-详细页面) public static int depth = 0; //要克隆的网站网址 public string Url { get; set; } //克隆后,保存的路径 public string SavePath { get; set; } private BackgroundWorker backgroundWorker1 = null; public event UrlChangedEventHandler UrlChanged; public event FileSavedSuccessEventHandler FileSavedSuccess; public event FileSavedFailEventHandler FileSavedFail; public event DownloadCompletedEventHandler DownloadCompleted; public event CollectingUrlEventHandler CollectingUrl; public event CollectedUrlEventHandler CollectedUrl; public event ProgressChangedEventHandler ProgressChanged; //所有页面、文件资源地址集合 private Dictionary<string, UrlModel> _Hrefs = new Dictionary<string, UrlModel>(); /// <summary> /// 所有页面、文件资源地址集合 /// </summary> public Dictionary<string,UrlModel> Hrefs        { get { return _Hrefs; } set { _Hrefs = value; }        } //网站页面请求编码,默认为UTF-8 private string _Encoding = "utf-8"; //网站页面请求编码,默认为UTF-8 public string Encoding        { get { return _Encoding; } set { _Encoding = value; }        } public WebCloneWorker() { } public WebCloneWorker(string url,string path)         { //设置网站、保存路径 this.Url = url; this.SavePath = path; if (string.IsNullOrEmpty(this.Url)) throw new Exception("请输入网址"); if (string.IsNullOrEmpty(this.SavePath)) throw new Exception("请选择要保存的目录");            backgroundWorker1 = new BackgroundWorker(); //设置报告进度更新 backgroundWorker1.WorkerReportsProgress = true;            backgroundWorker1.WorkerSupportsCancellation = true; //注册线程主体方法 backgroundWorker1.DoWork += backgroundWorker1_DoWork; //注册更新UI方法 backgroundWorker1.ProgressChanged += backgroundWorker1_ProgressChanged; //处理完毕 backgroundWorker1.RunWorkerCompleted += backgroundWorker1_RunWorkerCompleted;        } void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)        { if (e.Cancelled) { return;            } if (this.DownloadCompleted != null)            {                DownloadCompletedEventArgs eventArgs = new DownloadCompletedEventArgs(e.Result, e.Error, e.Cancelled); this.DownloadCompleted(this, eventArgs);            }        } void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)        { //进度回调 if (this.ProgressChanged != null) this.ProgressChanged(this, e);            UrlModel model = (UrlModel)e.UserState; if (this.UrlChanged != null)            { //Url改变后,回调 UrlChangedEventArgs eventArgs = new UrlChangedEventArgs(model); this.UrlChanged(this, eventArgs);            } try { string dir = this.SavePath; string url = model.AbsoluteUri; string AbsolutePath = url.Substring(url.IndexOf('/', 8)); string fileName = ""; if (url.IndexOf('?') > 0)                { string path = AbsolutePath.Substring(0, model.RelatedPath.IndexOf('?'));                    fileName = System.IO.Path.GetFileName(path);                } else {                    fileName = System.IO.Path.GetFileName(AbsolutePath);                } //默认首页 if (string.IsNullOrEmpty(fileName) || fileName.IndexOf(".") < 0)                {                    fileName = "index.html"; if (!AbsolutePath.EndsWith("/"))                        AbsolutePath = AbsolutePath + "/";                }                fileName = System.Web.HttpUtility.UrlDecode(fileName); string localPath = string.Format("{0}{1}", dir, System.IO.Path.GetDirectoryName(AbsolutePath)); if (!System.IO.Directory.Exists(localPath))                {                    System.IO.Directory.CreateDirectory(localPath);                } //判断文件是否存在,存在不再下载 string path2 = Path.Combine(localPath, fileName); if (File.Exists(path2))                { return;                } //下载网页、图片、资源文件  HttpTool.DownFile(url, localPath, fileName); //保存成功后,回调 if (this.FileSavedSuccess != null)                {                    FileSavedSuccessEventArgs eventArgs = new FileSavedSuccessEventArgs(model); this.FileSavedSuccess(this, eventArgs);                }            } catch (Exception ex)            { //保存失败后,回调 if (this.FileSavedFail != null)                {                    FileSavedFailEventArgs eventArgs = new FileSavedFailEventArgs(ex); this.FileSavedFail(this, eventArgs);                }            }        } void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)        { //获取资源  GetResource(); int index = 1; if (this.Hrefs.Keys.Count > 0)            { foreach (var k in this.Hrefs.Keys)                { //取消操作 if (backgroundWorker1.CancellationPending)                    {                        e.Cancel = true; return;                    }                    backgroundWorker1.ReportProgress(index, this.Hrefs[k]);                    index++; //挂起当前线程200毫秒 Thread.Sleep(200);                }            }        } public void Start()        { if (this.backgroundWorker1.IsBusy) return; this.backgroundWorker1.RunWorkerAsync();        } public void Cancel()        { if (this.backgroundWorker1.CancellationPending) return; this.backgroundWorker1.CancelAsync();        } private void GetResource()        { string url = this.Url; string referer = this.Url; string msg = ""; string html = HttpTool.HttpGet(url, referer, this.Encoding, out msg); //收集页面链接 GetHrefs(0, url, html); //收集完毕 if (null != CollectedUrl)            {                UrlModel urlModel = new UrlModel();                CollectedUrlEventArgs eventArgs = new CollectedUrlEventArgs(urlModel); this.CollectedUrl(this, eventArgs);            }        } private void GetHrefs(int level,string url,string html)        { #region 添加当前页 UrlModel currUrl = UrlParser.Parse(url); try { //取消 if (backgroundWorker1.CancellationPending) return; this.Hrefs.Add(currUrl.RelatedPath, currUrl); //收集回调 if (null != CollectingUrl)                {                    CollectingUrlEventArgs eventArgs = new CollectingUrlEventArgs(currUrl); this.CollectingUrl(this, eventArgs);                }            } catch {            } #endregion //获取相关链接(含有href属性的) List<UrlModel> list1 = WebPageService.GetLocalHrefs(url,html); //获取图片,文件等资源文件(含有src属性的) List<UrlModel> listSrcs = WebPageService.GetLocalSrcs(url,html); #region 获取当级资源文件 if (listSrcs != null)            { for (int i = 0; i < listSrcs.Count; i++)                {                    UrlModel urlModel = listSrcs[i]; try { //取消 if (backgroundWorker1.CancellationPending) return; this.Hrefs.Add(urlModel.RelatedPath, urlModel); //收集回调 if (null != CollectingUrl)                        {                            CollectingUrlEventArgs eventArgs = new CollectingUrlEventArgs(urlModel); this.CollectingUrl(this, eventArgs);                        }                    } catch { }                }            } #endregion #region 获取子级页面资源 //获取第二级 if (list1 != null)            { for (int i = 0; i < list1.Count; i++)                {                    UrlModel urlModel = list1[i]; try { //取消 if (backgroundWorker1.CancellationPending) return; this.Hrefs.Add(urlModel.RelatedPath, urlModel); //收集回调 if (null != CollectingUrl)                        {                            CollectingUrlEventArgs eventArgs = new CollectingUrlEventArgs(urlModel); this.CollectingUrl(this, eventArgs);                        }                    } catch { } string msg = "";                    html = HttpTool.HttpGet(urlModel.AbsoluteUri, urlModel.AbsoluteUri, this.Encoding, out msg); #region 获取子级资源文件 /* * 获取二级资源文件                     * */ listSrcs = WebPageService.GetLocalSrcs(urlModel.AbsoluteUri, html);//资源文件 if (listSrcs != null)                    { for (int j = 0; j < listSrcs.Count; j++)                        {                            UrlModel urlModel2 = listSrcs[j]; try { //取消 if (backgroundWorker1.CancellationPending) return; this.Hrefs.Add(urlModel2.RelatedPath, urlModel2); //收集回调 if (null != CollectingUrl)                                {                                    CollectingUrlEventArgs eventArgs = new CollectingUrlEventArgs(urlModel2); this.CollectingUrl(this, eventArgs);                                }                            } catch { } //挂起线程20毫秒 Thread.Sleep(20);                        }                    } #endregion //挂起线程20毫秒 Thread.Sleep(20); //到达指定深度后,退出 if (level >= depth) return; //递归 GetHrefs(level + 1, urlModel.AbsoluteUri, html);                }            } #endregion }

6.代码有点多,各位有需要的还是下载源码查看并运行吧。

百度网盘:链接:https://pan.baidu.com/s/1hja1rl9UEcl0dzTqVFt0dg 密码:7s6r

<

本文网址:https://fangzhan.seobr.com/news/186.html
本文由猎凡仿站网发布,如果您还想了解更多关于网站仿制的文章,请点击查看 仿站知识 的其它文章,转发请注明来源及版权归属。
承接以下业务:传统网站改自适应网站、网站增加手机站、网站改全屏网站改大屏展示、GBK转UFT-8、DEDECMS转WORDPRESS、网站站群制作、企业网站设计、企业网站优化、网站托管等业务

标签:页面   回调
相关文章

推荐资讯
热点标签

数年专注于仿站建站,擅长dedecms仿站、WORDPRESS仿站、帝国cms仿站、网站改版等业务! Copyright©2019 猎凡仿站网 All Rights Reserved