123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- using FCS.Common;
- using FCS.Interface;
- using FCS.Models;
- using FCS.Models.DTO;
- using FCS.Models.Entity;
- using HtmlAgilityPack;
- using Quartz;
- using System;
- using System.Collections.Generic;
- using System.Data;
- using System.Diagnostics;
- using System.Linq;
- using System.Text;
- using System.Threading;
- using System.Threading.Tasks;
- namespace FCS.Crawler.Basketball
- {
- /// <summary>
- /// 篮球球队抓取服务
- /// </summary>
- public class B_TeamJob : CommonJob, IJob
- {
- private static List<B_Team> AllTeamList = new List<B_Team>();
- private static List<B_Team> TeamList = new List<B_Team>();
- private static List<DataItemDetail> DateItem = new List<DataItemDetail>();
- public B_TeamJob()
- {
- log = new LogHelper();
- services = IOC.Resolve<IDTOpenCode>();
- }
- public void Execute(IJobExecutionContext context)
- {
- Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
- GetAll();
- }
- public void GetAll()
- {
- ThreadPool.SetMinThreads(10, 10);
- ThreadPool.SetMaxThreads(200, 200);
- var ds = services.Query<B_Grouping>("and Remark is not null", "", "distinct EventId,Season,Remark").ToList();
- AllTeamList = services.Query<B_Team>().ToList();
- //获取分组存取的url ds.Tables[0].Rows.Count
- foreach (var item1 in ds)
- {
- Task.Run(() =>
- {
- List<string> urlList = new List<string>();
- var url = item1.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do");
- urlList.Add(url);
- url = item1.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do");
- urlList.Add(url);
- url = item1.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do");
- urlList.Add(url);
- foreach (var urlItem in urlList)
- {
- Task.Run(() =>
- {
- HtmlDocument doc = CommonHelper.GetHtml(urlItem);
- //获取到球队链接a list
- var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a");
- //循环赛季
- if (TongJiListData != null)
- {
- foreach (var item in TongJiListData)
- {
- var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", "");
- var eventId = item1.EventId;
- Task.Run(() =>
- {
- GetTeamHtml(sjurl, teamName, eventId);
- });
- }
- }
- });
- }
- });
- }
- int maxWorkerThreads, workerThreads;
- int maxportThreads, portThreads;
- while (true)
- {
- /*
- GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
- 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
- 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
- */
- ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
- ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
- Thread.Sleep(1000);
- Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
- if (maxWorkerThreads - workerThreads == 0)
- {
- Console.WriteLine("Thread Finished!");
- break;
- }
- }
- try
- {
- if (TeamList != null)
- {
- var addList = new List<B_Team>();
- foreach (var item in TeamList)
- {
- if (AllTeamList.Count() == 0&& addList.Count()==0)
- {
- addList.Add(item);
- }
- else
- {
- if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0)
- {
- addList.Add(item);
- }
- }
- }
- services.SqlBulkCopyAdd<B_Team>(addList);
- }
- }
- catch (Exception ex)
- {
- throw;
- }
- }
- /// <summary>
- /// 解析球队信息
- /// </summary>
- /// <param name="url"></param>
- /// <returns></returns>
- public bool GetTeamHtml(string url, string TeamName, string eventId)
- {
- HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO {
- Url=url,
- Title = "球队详情"
- });
- if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
- {
- B_Team b_Team1 = new B_Team();
- b_Team1.Id = Guid.NewGuid().ToString();
- b_Team1.Name = TeamName;
- b_Team1.EventId = eventId;
- TeamList.Add(b_Team1);
- return false;
- }
- //球队图片
- var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img");
- //球队的基本信息
- var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em");
- B_Team b_Team2 = new B_Team();
- b_Team2.Id = Guid.NewGuid().ToString();
- b_Team2.Name = TeamName;
- b_Team2.Remark = url;
- b_Team2.EventId = eventId;
- b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
- b_Team2.Describe = "";
- if (teamData != null)
- {
- var dataList = teamData.ToList();
- b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : "";
- b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : "";
- b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : "";
- b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : "";
- }
- TeamList.Add(b_Team2);
- return false;
- }
- #region SQL语句
- /// <summary>
- /// 获取分组信息
- /// </summary>
- private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null";
- #endregion
- }
- }
|