123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- using System;
- using System.Collections.Generic;
- using System.Configuration;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using FCS.Crawler.ZCLotteryMatchs;
- using FCS.Interface;
- using FCS.Common;
- using FCS.Models.Entity;
- using HtmlAgilityPack;
- using System.Threading;
- namespace FCS.Crawler.Basketball
- {
- public class B_GroupingJob : CommonJob
- {
- private string eventId = "", url = "";
- public void Click()
- {
- GetNBA();
- }
- public void GetNBA()
- {
- var result = new List<B_Grouping>();
- var url = "http://lanqiu.zgzcw.com/1/{0}/saiAll.do".FormatMe(
- DateTime.Now.AddYears(-1).Year.ToString().Substring(2) + "-" + DateTime.Now.Year.ToString().Substring(2));
- eventId = new B_EventsJob().GetEventList().Where(p => p.Name == ConfigurationManager.AppSettings["NBAEventName"]).ToList()[0].Id;
- var doc = CommonHelper.GetHtml(url, "足彩");//加载主页面
- var season = GetNBASeason(doc.DocumentNode).ToList();
- foreach (var model in season)
- {
- taskList.Add(Task.Run(() =>
- {
- if (model.Remark != url)
- doc = CommonHelper.GetHtml(model.Remark, "足彩");//加载主页面
- var parentGroup = doc.DocumentNode.SelectSingleNode(".//div[@attr='saiAll']").SelectNodes("./span");
- foreach (var item in parentGroup)
- {
- var childId = item.Attributes["Id"].Value.Replace("Title", "") + "s";
- var childGroup_Div = doc.DocumentNode.SelectSingleNode(".//div[@id='{0}']".FormatMe(childId)).SelectSingleNode(".//div[@class='xq']");
- var id = CommonHelper.GetGuid().ToString();
- result.Add(GetModel(new B_Grouping
- {
- Id = id,
- Name = item.SelectNodes("./a")[0].InnerText.Trim(),
- Sort = parentGroup.IndexOf(item) + 1,
- Remark = model.Remark,
- Season = model.Season,
- Extended1 = childGroup_Div == null ? "-1" : string.Empty,
- }));
- if (childGroup_Div != null)
- {
- var childGroup = childGroup_Div.SelectNodes(".//a");
- if (childGroup != null)
- {
- foreach (var _item in childGroup)
- {
- result.Add(GetModel(new B_Grouping
- {
- Name = _item.InnerText.Trim(),
- Sort = childGroup.IndexOf(_item) + 1,
- ParentId = id,
- Remark = model.Remark,
- Season = model.Season,
- Extended1 = _item.Attributes["attr"].Value
- }));
- }
- }
- }
- }
- }));
- }
- Task.WaitAll(taskList.ToArray());
- var data = new List<B_Grouping>();
- var groupList = services.Query<B_Grouping>(" AND EventId='{0}'".FormatMe(eventId));
- //新增的父分组、子分组
- var parentList = (from a in result
- where !(from b in groupList where string.IsNullOrEmpty(b.ParentId) && a.EventId == b.EventId select b.Name.Trim()).Contains(a.Name.Trim()) && string.IsNullOrEmpty(a.ParentId)
- select a).ToList();
-
- parentList.ForEach(p =>
- {
- data.Add(p);
- var childList = result.Where(q => q.ParentId == p.Id).ToList();
- childList.ForEach(q =>
- {
- data.Add(q);
- });
- });
- //新增的子分组
- groupList.Where(p => string.IsNullOrEmpty(p.ParentId)).ToList().ForEach(q =>
- {
- var oldList = groupList.Where(s => s.ParentId.Trim() == q.Id && s.EventId == q.EventId).ToList();//数据库子节点的数据
- var newParentId = result.Where(s => s.Name.Trim() == q.Name.Trim() && s.EventId == q.EventId && string.IsNullOrEmpty(s.ParentId)).ToList();//爬取数据子节点数据
- var newList = result.Where(s => s.ParentId == (newParentId.Count > 0 ? newParentId[0].Id : "0")).ToList();
- if (newList.Count > 0 && oldList.Count != newList.Count)
- {
- var list = (from a in newList
- where !(from b in oldList select b.Name.Trim()).Contains(a.Name.Trim())
- select a).ToList();
- list.ForEach(d =>
- {
- d.ParentId = q.Id;
- data.Add(d);
- });
- }
-
- });
- services.SqlBulkCopyAdd(data);
- }
- private B_Grouping GetModel(B_Grouping model)
- {
- if (model.Id.IsEmpty())
- model.Id = CommonHelper.GetGuid().ToString();
- model.CreateDateTime = DateTime.Now;
- model.EventId = eventId;
- return model;
- }
- /// <summary>
- /// 得到NBA的期数
- /// </summary>
- /// <param name="doc"></param>
- /// <returns></returns>
- private IEnumerable<B_Grouping> GetNBASeason(HtmlNode doc)
- {
- // var list = new List< B_Grouping >()
- var oldSeason = (from a in services.Query<B_Grouping>(" AND EventId='{0}'".FormatMe(eventId))
- group a by a.Season into g
- select g.Key).ToList();
- var season = doc.SelectSingleNode(".//select[@id='selectSeason']").SelectNodes(".//option");
- foreach (var item in season)
- {
- if (!oldSeason.Contains(item.Attributes["value"].Value) || item.Attributes["value"].Value.Contains(DateTime.Now.Year.ToString()))
- yield return new B_Grouping
- {
- Remark = "http://lanqiu.zgzcw.com/1/" + item.Attributes["value"].Value + "/saiAll.do",
- Season = item.Attributes["value"].Value
- };
- }
- }
- }
- }
|