Commons:Photo challenge/code/CreateVoting.cs
Jump to navigation
Jump to search
//Create voting code
//Author user:colin
//License: Public domain
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
namespace CreateVoting
{
internal class Program
{
private static async Task Main(string[] args)
{
const string aChallenge = "2023 - December - In a box";
string challenge = args.Length > 0 ? args[0] : aChallenge;
DateTime? minCreateDate = null; // new DateTime(2015, 9, 1); // Change for theme?
bool draft = false;
bool panorama = false; // true;
string outFile = challenge + ".txt";
// Will need to work on this for multi-month challenges
string theme = challenge;
DateTime minUploadDate;
DateTime maxUploadDate;
string[] parts = challenge.Split(new[] {" - "}, StringSplitOptions.RemoveEmptyEntries);
if (parts.Length == 3)
{
theme = parts[2];
string minUploadText = "1 " + parts[1] + " " + parts[0];
if (DateTime.TryParse(minUploadText, out minUploadDate))
{
maxUploadDate = minUploadDate.AddMonths(1).AddDays(0.5);
}
else
{
minUploadDate = new DateTime(2016, 3, 1);
maxUploadDate = new DateTime(2020, 5, 1);
}
}
else
{
minUploadDate = new DateTime(2020, 5, 1);
maxUploadDate = new DateTime(2020, 9, 1).AddDays(0.5);
}
DateTime closeTime = maxUploadDate.AddDays(-1);
DateTime voteCloseTime = maxUploadDate.AddMonths(1).AddDays(-1);
string url = "https://commons.wikimedia.org/w/index.php?title=Commons:Photo_challenge/" +
challenge.Replace(' ', '_') + "&action=raw";
using (var errorWriter = new StreamWriter("Errors-" + outFile, false, Encoding.UTF8))
{
List<string> wikiText = await DownloadWikiFile(url, errorWriter);
if (wikiText == null)
{
errorWriter.WriteLine("No wikitext");
return;
}
var infos = new List<FileInfo>();
bool foundGallery = false;
foreach (string line in wikiText)
{
if (foundGallery)
{
if (line.StartsWith("<!--"))
{
continue;
}
if (line.StartsWith("</gallery>"))
{
break;
}
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
int bar = line.IndexOf('|');
string fileName = (bar == -1) ? line : line.Substring(0, bar);
string title = (bar == -1) ? string.Empty : line.Substring(bar + 1);
if (fileName.ToLower().StartsWith("file:"))
{
fileName = fileName.Substring(5);
}
fileName = fileName.Replace('_', ' ');
if (string.IsNullOrWhiteSpace(title))
{
int dot = fileName.LastIndexOf('.');
title = (dot == -1) ? fileName : fileName.Substring(0, dot);
}
if (fileName == "W2321-ToInsertYourPicToChallengeClickBelow.svg")
{
continue;
}
FileInfo info = await GetFileInfo(fileName, title, errorWriter);
if (info == null)
{
errorWriter.WriteLine("BAD: " + fileName);
}
else
{
Console.WriteLine(info.FileName);
infos.Add(info);
}
}
else
{
if (line.StartsWith("<gallery ") && line.Contains("250px"))
{
foundGallery = true;
}
}
}
using (var writer = new StreamWriter(outFile, false, Encoding.UTF8))
{
writer.WriteLine("__NOTOC__");
if (draft)
{
writer.WriteLine("{{Discussion top|THIS IS A DRAFT. DO NOT VOTE YET!}}");
}
writer.WriteLine("");
writer.WriteLine(
"'''Voting will end at midnight UTC on {0:dd MMMM yyyy}'''. The theme was '''{1}'''.",
voteCloseTime, theme);
writer.WriteLine("");
writer.WriteLine(
"{{Commons:Photo challenge/Voting header/{{SuperFallback|Commons:Photo challenge/Voting header}}}}");
writer.WriteLine("");
writer.WriteLine("===Sample===");
writer.WriteLine(
"[[File:Sample-image.svg|none|thumb|300x300px|Sample caption. [{{filepath:Sample-image.svg}}<br>''(Full size image)'']]]");
writer.WriteLine(
"*{{3/3*}} Great font! -- [[User:Colin|Colin]] ([[User talk:Colin|<span class=\"signature-talk\">talk</span>]]) 18:22, 2 January 2014 (UTC)");
writer.WriteLine("*{{2/3*}} -- '''Another person''' 18:20, 2 January 2014 (UTC)");
writer.WriteLine("*{{1/3*}} Cool. Love it. -- '''Yet another person''' 18:32, 2 January 2014 (UTC)");
writer.WriteLine("*{{0/3*}} Beautiful -- '''Someone else''' 13:30 3 January 2014 (UTC)");
writer.WriteLine("");
int i = 1;
foreach (FileInfo file in infos.OrderBy(im => im.Uploaded))
{
if (minCreateDate.HasValue && (!file.Created.HasValue || file.Created < minCreateDate))
{
errorWriter.WriteLine("{0} created early by user {1}", file.Url, file.Creator);
writer.WriteLine(
"<!-- REMOVED: {0} by [[User:{1}|{1}]] was taken {2:dd MMMM yyyy} too early ({3:dd MMMM yyyy}) -->",
file.FileName, file.Creator, file.Created, minCreateDate);
writer.WriteLine("");
continue;
}
double ratio = (((double) file.Width)/file.Height);
if (panorama && !(ratio <= 0.5 || ratio >= 2.0))
{
errorWriter.WriteLine("{0} wrong ratio {1} user {2}", file.Url, file.AspectRatioText, file.Creator);
writer.WriteLine(
"<!-- REMOVED: {0} by [[User:{1}|{1}]] wrong ratio {2}) -->",
file.FileName, file.Creator, file.AspectRatioText);
writer.WriteLine("");
continue;
}
if (file.Uploaded < minUploadDate)
{
errorWriter.WriteLine("{0} uploaded early by user {1}", file.Url, file.Creator);
writer.WriteLine(
"<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} before the challenge opened ({3:dd MMMM yyyy}) -->",
file.FileName, file.Creator, file.Uploaded, minUploadDate);
writer.WriteLine("");
continue;
}
if (file.Uploaded >= maxUploadDate)
{
errorWriter.WriteLine("{0} uploaded late by user {1}", file.Url, file.Creator);
writer.WriteLine(
"<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} after the challenge closed ({3:dd MMMM yyyy}) -->",
file.FileName, file.Creator, file.Uploaded, closeTime);
writer.WriteLine("");
continue;
}
writer.WriteLine("==={0}. {1}===", i, file.FileWithoutExt);
writer.WriteLine(
"[[File:{0}|none|thumb|{1}px|{2} [{{{{filepath:{0}}}}}<br>''(Full size image)'']]]",
file.FileName, file.WidthForSize(240000), file.Title);
writer.WriteLine(
"<!-- '''Creator:''' [[User:{0}|{0}]] --> {1}'''Uploaded:''' {2:dd MMMM yyyy} '''Size''': {3} × {4} ({5:F1} MP){6}{{{{Collapse top|Current votes – please choose your own winners before looking}}}}",
file.Creator,
file.Created.HasValue
? string.Format("'''Taken:''' {0:dd MMMM yyyy} ", file.Created)
: string.Empty,
file.Uploaded,
file.Width,
file.Height,
file.Megapixels,
panorama ? string.Format(" '''Aspect ratio:''' {0} ", file.AspectRatioText) : string.Empty);
writer.WriteLine("<!-- Vote below this line -->");
writer.WriteLine("<!-- Vote above this line -->");
writer.WriteLine("{{Collapse bottom}}");
writer.WriteLine("");
++i;
}
var users = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = gr.Count() }).OrderByDescending(u => u.Nbr);
var usersMaxed = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = Math.Min(4, gr.Count()) }).OrderByDescending(u => u.Nbr);
int nbrEntriesIfCapped = usersMaxed.Sum(gr => gr.Nbr);
errorWriter.WriteLine(users.Count() + " users");
foreach (var user in users)
{
errorWriter.WriteLine(user.Name + " " + user.Nbr);
if (user.Nbr > 4)
{
errorWriter.WriteLine(" (<gallery>");
foreach (var f in infos.Where(z => z.Creator == user.Name).OrderByDescending(x => x.Uploaded))
{
errorWriter.WriteLine(f.FileName);
}
errorWriter.WriteLine("</gallery>)");
}
}
int nbrTakenDuringChallenge = infos.Count(info => info.Created >= minUploadDate && info.Created <= maxUploadDate);
errorWriter.WriteLine(infos.Count() + " photos and " + nbrTakenDuringChallenge + " taken during challenge. " + nbrEntriesIfCapped + " if capped");
foreach (var user in users.Where(u => u.Nbr > 4))
{
string files = string.Join(", ", infos.Where(inf => inf.Creator == user.Name).Select(inf => string.Format("[[:File:{0}]]", inf.FileName)));
errorWriter.WriteLine(user.Name + " uploaded " + user.Nbr + " images: " + files);
}
if (draft)
{
writer.WriteLine("{{Discussion bottom}}");
}
}
}
Console.WriteLine("Press a key");
Console.ReadKey();
}
private static async Task<FileInfo> GetFileInfo(string fileName, string title, StreamWriter errorWriter)
{
var info = new FileInfo {FileName = fileName, Title = title};
HttpClient client = new HttpClient();
HttpResponseMessage response = null;
try
{
response = await client.GetAsync(info.Url);
response.EnsureSuccessStatusCode();
}
catch (Exception e)
{
errorWriter.WriteLine(info.Url + " gave " + e.Message);
return null;
}
using (Stream receiveStream = await response.Content.ReadAsStreamAsync())
{
var doc = new HtmlDocument();
doc.Load(receiveStream, Encoding.UTF8);
HtmlNode docNode = doc.DocumentNode;
HtmlNodeCollection uploadedNodes =
docNode.SelectNodes(
"//table[@class='wikitable filehistory']/tr/td[contains(@style, 'white-space: nowrap;')]/a[contains(@href, 'upload.wikimedia.org')]");
if (uploadedNodes != null)
{
HtmlNode firstupload = uploadedNodes.Last();
// 12:53, 8 March 2014
string date = firstupload.InnerText;
int comma = date.IndexOf(',');
if (comma != -1)
{
date = date.Substring(comma + 1) + " " + date.Substring(0, comma);
DateTime uploaded;
if (DateTime.TryParse(date, out uploaded))
{
info.Uploaded = uploaded;
}
}
}
if (!info.Uploaded.HasValue)
{
errorWriter.WriteLine(info.Url + " had no uploaded date time.");
}
////HtmlNodeCollection uploadNodes = docNode.SelectNodes("//table[@class='wikitable filehistory']/tr/td/a[contains(@href, '/wiki/User:') or contains(@href, '/w/index.php?title=User:')]");
HtmlNodeCollection uploadNodes =
docNode.SelectNodes(
"//table[@class='wikitable filehistory']/tr/td/a[contains(@class, 'mw-userlink')]");
if (uploadNodes != null)
{
HtmlNode firstContrib = uploadNodes.Last();
info.Creator = firstContrib.InnerText;
}
if (string.IsNullOrEmpty(info.Creator))
{
errorWriter.WriteLine(info.Url + " had no creator.");
}
HtmlNode finfoNode = docNode.SelectNodes("//span[@class='fileInfo']")?.FirstOrDefault();
if (finfoNode != null)
{
//(900 × 657 pixels, file size: 117 KB, MIME type: image/jpeg)
string inner = finfoNode.InnerText;
if (inner.Contains("image/jpeg")) {
string[] words = inner.Substring(1).Split(new[] { ' ' });
string width = words[0].Replace(",", "");
string height = words[2].Replace(",", "");
string sizeNum = words[6].Replace(",", "");
string sizeDim = words[7].Substring(0, 2);
info.Width = int.Parse(width);
info.Height = int.Parse(height);
info.FileSizeMB = double.Parse(sizeNum);
if (sizeDim == "KB")
{
info.FileSizeMB = info.FileSizeMB / 1024.0;
}
} else {
info.Width = 0;
info.Height = 0;
info.FileSizeMB = 0;
}
}
if (info.Width == 0)
{
errorWriter.WriteLine(info.Url + " had no file info.");
}
// <time class="dtstart" datetime="2015-04-12 09:17:25">12 April 2015, 09:17:25</time>
// <tr class="exif-datetimeoriginal">
HtmlNodeCollection startNodes = docNode.SelectNodes("//time[@class='dtstart']");
// Some images have object date before photo date
HtmlNode startNode = startNodes == null ? null : startNodes.LastOrDefault();
if (startNode != null)
{
string timeStamp = startNode.GetAttributeValue("datetime", string.Empty);
DateTime created;
if (DateTime.TryParse(timeStamp, out created))
{
info.Created = created;
}
}
else
{
startNodes = docNode.SelectNodes("//tr[@class='exif-datetimeoriginal']/td");
startNode = startNodes == null ? null : startNodes.FirstOrDefault();
if (startNode != null)
{
DateTime created;
if (DateTime.TryParse(startNode.InnerText, out created))
{
info.Created = created;
}
}
}
if (!info.Created.HasValue)
{
//errorWriter.WriteLine(info.Url + " had no created date.");
}
var ownWorkNodes = docNode.SelectNodes("//span[@class='int-own-work']");
info.OwnWork = (ownWorkNodes != null);
if (!info.OwnWork)
{
errorWriter.WriteLine(info.Url + " may not be own work.");
}
}
return info;
}
private static async Task<List<string>> DownloadWikiFile(string url, StreamWriter errorWriter)
{
HttpClient client = new HttpClient();
HttpResponseMessage response = null;
try
{
response = await client.GetAsync(url);
response.EnsureSuccessStatusCode();
}
catch (Exception e)
{
errorWriter.WriteLine(url + " gave " + e.Message);
return null;
}
if ((response.StatusCode == HttpStatusCode.OK ||
response.StatusCode == HttpStatusCode.Moved ||
response.StatusCode == HttpStatusCode.Redirect) &&
response.Content.Headers.ContentType.MediaType.StartsWith("text", StringComparison.OrdinalIgnoreCase))
{
var result = new List<string>();
// if the remote file was found, download it
using (Stream inputStream = await response.Content.ReadAsStreamAsync())
{
using (TextReader reader = new StreamReader(inputStream))
{
while (true)
{
string line = reader.ReadLine();
if (line == null)
{
return result;
}
result.Add(line);
}
}
}
}
errorWriter.WriteLine(url + " gave " + response.StatusCode);
return null;
}
public class FileInfo
{
public string FileName { get; set; }
public string Title { get; set; }
public string Creator { get; set; }
public DateTime? Created { get; set; }
public DateTime? Uploaded { get; set; }
public int Width { get; set; }
public int Height { get; set; }
public bool OwnWork { get; set; }
public int WidthForSize(int sizePx)
{
return (int)(Width * Math.Sqrt(((double)sizePx) / (Width * Height)));
}
public string Megapixels
{
get
{
double mp = ((Width*Height)/1000.0/1000.0);
double mpRound = Math.Floor(mp*10.0)/10.0;
return mpRound.ToString("F1");
}
}
public string FileWithoutExt
{
get
{
int dot = FileName.LastIndexOf('.');
return (dot == -1) ? FileName : FileName.Substring(0, dot);
}
}
public double AspectRatio
{
get
{
double ratio = Width > Height ? ((double)Width / Height) : ((double)Height / Width);
return Math.Floor(ratio * 10.0) / 10.0;
}
}
public string AspectRatioText
{
get
{
return Width > Height
? (AspectRatio.ToString("F1") + ":1")
: ("1:" + AspectRatio.ToString("F1"));
}
}
public string Url
{
get
{
return "https://commons.wikimedia.org/wiki/File:" + FileName.Replace(' ', '_').Replace("?", "%3F");
}
}
public double FileSizeMB { get; set; }
}
}
}