using FCS.Common; using FCS.Interface; using FCS.Models; using FCS.Models.DTO; using FCS.Models.Entity; using HtmlAgilityPack; using Quartz; using System; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; namespace FCS.Crawler.Basketball { /// /// 篮球球队抓取服务 /// public class B_TeamJob : CommonJob, IJob { private static List AllTeamList = new List(); private static List TeamList = new List(); private static List DateItem = new List(); public B_TeamJob() { log = new LogHelper(); services = IOC.Resolve(); } public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); GetAll(); } public void GetAll() { ThreadPool.SetMinThreads(10, 10); ThreadPool.SetMaxThreads(200, 200); var ds = services.Query("and Remark is not null", "", "distinct EventId,Season,Remark").ToList(); AllTeamList = services.Query().ToList(); //获取分组存取的url ds.Tables[0].Rows.Count foreach (var item1 in ds) { Task.Run(() => { List urlList = new List(); var url = item1.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do"); urlList.Add(url); url = item1.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do"); urlList.Add(url); url = item1.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do"); urlList.Add(url); foreach (var urlItem in urlList) { Task.Run(() => { HtmlDocument doc = CommonHelper.GetHtml(urlItem); //获取到球队链接a list var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a"); //循环赛季 if (TongJiListData != null) { foreach (var item in TongJiListData) { var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value; var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", ""); var eventId = item1.EventId; Task.Run(() => { GetTeamHtml(sjurl, teamName, eventId); }); } } }); } }); } int maxWorkerThreads, workerThreads; int maxportThreads, portThreads; while (true) { /* GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。 */ ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads); ThreadPool.GetAvailableThreads(out workerThreads, out portThreads); Thread.Sleep(1000); Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads)); if (maxWorkerThreads - workerThreads == 0) { Console.WriteLine("Thread Finished!"); break; } } try { if (TeamList != null) { var addList = new List(); foreach (var item in TeamList) { if (AllTeamList.Count() == 0&& addList.Count()==0) { addList.Add(item); } else { if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0) { addList.Add(item); } } } services.SqlBulkCopyAdd(addList); } } catch (Exception ex) { throw; } } /// /// 解析球队信息 /// /// /// public bool GetTeamHtml(string url, string TeamName, string eventId) { HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO { Url=url, Title = "球队详情" }); if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料")) { B_Team b_Team1 = new B_Team(); b_Team1.Id = Guid.NewGuid().ToString(); b_Team1.Name = TeamName; b_Team1.EventId = eventId; TeamList.Add(b_Team1); return false; } //球队图片 var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img"); //球队的基本信息 var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em"); B_Team b_Team2 = new B_Team(); b_Team2.Id = Guid.NewGuid().ToString(); b_Team2.Name = TeamName; b_Team2.Remark = url; b_Team2.EventId = eventId; b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value; b_Team2.Describe = ""; if (teamData != null) { var dataList = teamData.ToList(); b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : ""; b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : ""; b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : ""; b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : ""; } TeamList.Add(b_Team2); return false; } #region SQL语句 /// /// 获取分组信息 /// private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null"; #endregion } }