using FCS.Common; using FCS.Interface; using FCS.Models; using FCS.Models.DTO; using FCS.Models.Entity; using HtmlAgilityPack; using Quartz; using System; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; namespace FCS.Crawler.Basketball { /// /// 篮球球队抓取服务 /// public class B_TeamJob : CommonJob, IJob { private static List AllTeamList = new List(); private static List TeamList = new List(); private static List DateItem = new List(); B_Team g; public B_TeamJob() { log = new LogHelper(); services = IOC.Resolve(); g = new B_Team(); } public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); GetAll(); } public void GetAll() { var ds = services.Query("and Remark is not null", "", "distinct EventId,Season,Remark").ToList(); AllTeamList = services.Query().ToList(); //获取分组存取的url ds.Tables[0].Rows.Count int max = ds.Count; int num = 0; ds.ForEach(async p => { await Task.Run(() => { List urlList = new List(); var url = p.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do"); urlList.Add(url); url = p.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do"); urlList.Add(url); url = p.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do"); urlList.Add(url); foreach (var urlItem in urlList) { Task.Run(() => { HtmlDocument doc = CommonHelper.GetHtml(urlItem); //获取到球队链接a list var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a"); //循环赛季 if (TongJiListData != null) { foreach (var item in TongJiListData) { var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value; var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", ""); var eventId = p.EventId; Task.Run(() => { GetTeamHtml(sjurl, teamName, eventId); }); } } }); } }); lock (g) { num++; Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。 } }); lock (g) { while (num < max) { Monitor.Wait(g);//等待 } } Trace.WriteLine("获取B_Team 完结"); try { if (TeamList != null) { var addList = new List(); foreach (var item in TeamList) { if (AllTeamList.Count() == 0&& addList.Count()==0) { addList.Add(item); } else { if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0) { addList.Add(item); } } } services.SqlBulkCopyAdd(addList); } } catch (Exception ex) { throw; } } /// /// 解析球队信息 /// /// /// public bool GetTeamHtml(string url, string TeamName, string eventId) { HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO { Url=url, Title = "球队详情" }); if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料")) { B_Team b_Team1 = new B_Team(); b_Team1.Id = Guid.NewGuid().ToString(); b_Team1.Name = TeamName; b_Team1.EventId = eventId; TeamList.Add(b_Team1); return false; } //球队图片 var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img"); //球队的基本信息 var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em"); B_Team b_Team2 = new B_Team(); b_Team2.Id = Guid.NewGuid().ToString(); b_Team2.Name = TeamName; b_Team2.Remark = url; b_Team2.EventId = eventId; b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value; b_Team2.Describe = ""; if (teamData != null) { var dataList = teamData.ToList(); b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : ""; b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : ""; b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : ""; b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : ""; } TeamList.Add(b_Team2); return false; } #region SQL语句 /// /// 获取分组信息 /// private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null"; #endregion } }