Commons:Photo challenge/code/CreateVoting.cs

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
//Create voting code
//Author user:colin
//License: Public domain
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;

namespace CreateVoting
{
    internal class Program
    {
        private static async Task Main(string[] args)
        {
            const string aChallenge = "2023 - December - In a box";

            string challenge = args.Length > 0 ? args[0] : aChallenge;

            DateTime? minCreateDate = null; //  new DateTime(2015, 9, 1); // Change for theme? 
            bool draft = false;
            bool panorama = false; // true;

            string outFile = challenge + ".txt";

            // Will need to work on this for multi-month challenges
            string theme = challenge;
            DateTime minUploadDate;
            DateTime maxUploadDate;
            string[] parts = challenge.Split(new[] {" - "}, StringSplitOptions.RemoveEmptyEntries);
            if (parts.Length == 3)
            {
                theme = parts[2];
                string minUploadText = "1 " + parts[1] + " " + parts[0];

                if (DateTime.TryParse(minUploadText, out minUploadDate))
                {
                    maxUploadDate = minUploadDate.AddMonths(1).AddDays(0.5);
                }
                else
                {
                    minUploadDate = new DateTime(2016, 3, 1);
                    maxUploadDate = new DateTime(2020, 5, 1);
                }
            }
            else
            {
                minUploadDate = new DateTime(2020, 5, 1);
                maxUploadDate = new DateTime(2020, 9, 1).AddDays(0.5);
            }

            DateTime closeTime = maxUploadDate.AddDays(-1);
            DateTime voteCloseTime = maxUploadDate.AddMonths(1).AddDays(-1);

            string url = "https://commons.wikimedia.org/w/index.php?title=Commons:Photo_challenge/" +
                         challenge.Replace(' ', '_') + "&action=raw";

            using (var errorWriter = new StreamWriter("Errors-" + outFile, false, Encoding.UTF8))
            {
                List<string> wikiText = await DownloadWikiFile(url, errorWriter);

                if (wikiText == null)
                {
                    errorWriter.WriteLine("No wikitext");
                    return;
                }

                var infos = new List<FileInfo>();

                bool foundGallery = false;
                foreach (string line in wikiText)
                {
                    if (foundGallery)
                    {
                        if (line.StartsWith("<!--"))
                        {
                            continue;
                        }
                        if (line.StartsWith("</gallery>"))
                        {
                            break;
                        }

                        if (string.IsNullOrWhiteSpace(line))
                        {
                            continue;
                        }

                        int bar = line.IndexOf('|');
                        string fileName = (bar == -1) ? line : line.Substring(0, bar);
                        string title = (bar == -1) ? string.Empty : line.Substring(bar + 1);

                        if (fileName.ToLower().StartsWith("file:"))
                        {
                            fileName = fileName.Substring(5);
                        }
                        fileName = fileName.Replace('_', ' ');

                        if (string.IsNullOrWhiteSpace(title))
                        {
                            int dot = fileName.LastIndexOf('.');
                            title = (dot == -1) ? fileName : fileName.Substring(0, dot);
                        }

                        if (fileName == "W2321-ToInsertYourPicToChallengeClickBelow.svg")
                        {
                            continue;
                        }

                        FileInfo info = await GetFileInfo(fileName, title, errorWriter);

                        if (info == null)
                        {
                            errorWriter.WriteLine("BAD: " + fileName);
                        }
                        else
                        {
                            Console.WriteLine(info.FileName);
                            infos.Add(info);
                        }
                    }
                    else
                    {
                        if (line.StartsWith("<gallery ") && line.Contains("250px"))
                        {
                            foundGallery = true;
                        }
                    }
                }

                using (var writer = new StreamWriter(outFile, false, Encoding.UTF8))
                {
                    writer.WriteLine("__NOTOC__");
                    if (draft)
                    {
                        writer.WriteLine("{{Discussion top|THIS IS A DRAFT. DO NOT VOTE YET!}}");
                    }
                    writer.WriteLine("");
                    writer.WriteLine(
                        "'''Voting will end at midnight UTC on {0:dd MMMM yyyy}'''. The theme was '''{1}'''.",
                        voteCloseTime, theme);
                    writer.WriteLine("");
                    writer.WriteLine(
                        "{{Commons:Photo challenge/Voting header/{{SuperFallback|Commons:Photo challenge/Voting header}}}}");
                    writer.WriteLine("");
                    writer.WriteLine("===Sample===");
                    writer.WriteLine(
                        "[[File:Sample-image.svg|none|thumb|300x300px|Sample caption. [{{filepath:Sample-image.svg}}<br>''(Full size image)'']]]");
                    writer.WriteLine(
                        "*{{3/3*}} Great font! -- [[User:Colin|Colin]] ([[User talk:Colin|<span class=\"signature-talk\">talk</span>]]) 18:22, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{2/3*}} -- '''Another person''' 18:20, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{1/3*}} Cool. Love it. -- '''Yet another person''' 18:32, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{0/3*}} Beautiful -- '''Someone else''' 13:30 3 January 2014 (UTC)");
                    writer.WriteLine("");

                    int i = 1;

                    foreach (FileInfo file in infos.OrderBy(im => im.Uploaded))
                    {
                        if (minCreateDate.HasValue && (!file.Created.HasValue || file.Created < minCreateDate))
                        {
                            errorWriter.WriteLine("{0} created early by user {1}", file.Url, file.Creator);
                            writer.WriteLine(
                                "<!-- REMOVED: {0} by [[User:{1}|{1}]] was taken {2:dd MMMM yyyy} too early ({3:dd MMMM yyyy}) -->",
                                file.FileName, file.Creator, file.Created, minCreateDate);
                            writer.WriteLine("");
                            continue;
                        }

                        double ratio = (((double) file.Width)/file.Height);
                        if (panorama && !(ratio <= 0.5 || ratio >= 2.0))
                        {
                            errorWriter.WriteLine("{0} wrong ratio {1} user {2}", file.Url, file.AspectRatioText, file.Creator);
                            writer.WriteLine(
                                "<!-- REMOVED: {0} by [[User:{1}|{1}]] wrong ratio {2}) -->",
                                file.FileName, file.Creator, file.AspectRatioText);
                            writer.WriteLine("");
                            continue;
                        }

                        if (file.Uploaded < minUploadDate)
                        {
                            errorWriter.WriteLine("{0} uploaded early by user {1}", file.Url, file.Creator);
                            writer.WriteLine(
                                "<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} before the challenge opened ({3:dd MMMM yyyy}) -->",
                                file.FileName, file.Creator, file.Uploaded, minUploadDate);
                            writer.WriteLine("");
                            continue;
                        }

                        if (file.Uploaded >= maxUploadDate)
                        {
                            errorWriter.WriteLine("{0} uploaded late by user {1}", file.Url, file.Creator);
                            writer.WriteLine(
                                "<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} after the challenge closed ({3:dd MMMM yyyy}) -->",
                                file.FileName, file.Creator, file.Uploaded, closeTime);
                            writer.WriteLine("");
                            continue;
                        }

                        writer.WriteLine("==={0}. {1}===", i, file.FileWithoutExt);
                        writer.WriteLine(
                            "[[File:{0}|none|thumb|{1}px|{2} [{{{{filepath:{0}}}}}<br>''(Full size image)'']]]",
                            file.FileName, file.WidthForSize(240000), file.Title);
                        writer.WriteLine(
                            "<!-- '''Creator:''' [[User:{0}|{0}]] --> {1}'''Uploaded:''' {2:dd MMMM yyyy} '''Size''': {3} × {4} ({5:F1} MP){6}{{{{Collapse top|Current votes – please choose your own winners before looking}}}}",
                            file.Creator,
                            file.Created.HasValue
                                ? string.Format("'''Taken:''' {0:dd MMMM yyyy} ", file.Created)
                                : string.Empty,
                            file.Uploaded,
                            file.Width,
                            file.Height,
                            file.Megapixels,
                            panorama ? string.Format(" '''Aspect ratio:''' {0} ", file.AspectRatioText) : string.Empty);
                        writer.WriteLine("<!-- Vote below this line -->");
                        writer.WriteLine("<!-- Vote above this line -->");
                        writer.WriteLine("{{Collapse bottom}}");
                        writer.WriteLine("");

                        ++i;
                    }

                    var users = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = gr.Count() }).OrderByDescending(u => u.Nbr);

                    var usersMaxed = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = Math.Min(4, gr.Count()) }).OrderByDescending(u => u.Nbr);

                    int nbrEntriesIfCapped = usersMaxed.Sum(gr => gr.Nbr);

                    errorWriter.WriteLine(users.Count() + " users");
                    foreach (var user in users)
                    {
                        errorWriter.WriteLine(user.Name + " " + user.Nbr);
                        if (user.Nbr > 4)
                        {
                            errorWriter.WriteLine(" (<gallery>");
                            foreach (var f in infos.Where(z => z.Creator == user.Name).OrderByDescending(x => x.Uploaded))
                            {
                                errorWriter.WriteLine(f.FileName);
                            }
                            errorWriter.WriteLine("</gallery>)");
                        }
                    }

                    int nbrTakenDuringChallenge = infos.Count(info => info.Created >= minUploadDate && info.Created <= maxUploadDate);

                    errorWriter.WriteLine(infos.Count() + " photos and " + nbrTakenDuringChallenge + " taken during challenge. " + nbrEntriesIfCapped + " if capped");

                    foreach (var user in users.Where(u => u.Nbr > 4))
                    {
                        string files = string.Join(", ", infos.Where(inf => inf.Creator == user.Name).Select(inf => string.Format("[[:File:{0}]]", inf.FileName)));

                        errorWriter.WriteLine(user.Name + " uploaded " + user.Nbr + " images: " + files);
                    }

                    if (draft)
                    {
                        writer.WriteLine("{{Discussion bottom}}");
                    }
                }
            }

            Console.WriteLine("Press a key");
            Console.ReadKey();
        }

        private static async Task<FileInfo> GetFileInfo(string fileName, string title, StreamWriter errorWriter)
        {
            var info = new FileInfo {FileName = fileName, Title = title};

            HttpClient client = new HttpClient();
            HttpResponseMessage response = null;
            try
            {
                response = await client.GetAsync(info.Url);
                response.EnsureSuccessStatusCode();
            }
            catch (Exception e)
            {
                errorWriter.WriteLine(info.Url + " gave " + e.Message);
                return null;
            }

            using (Stream receiveStream = await response.Content.ReadAsStreamAsync())
            {
                var doc = new HtmlDocument();
                doc.Load(receiveStream, Encoding.UTF8);

                HtmlNode docNode = doc.DocumentNode;

                HtmlNodeCollection uploadedNodes =
                    docNode.SelectNodes(
                        "//table[@class='wikitable filehistory']/tr/td[contains(@style, 'white-space: nowrap;')]/a[contains(@href, 'upload.wikimedia.org')]");
                if (uploadedNodes != null)
                {
                    HtmlNode firstupload = uploadedNodes.Last();
                    // 12:53, 8 March 2014
                    string date = firstupload.InnerText;
                    int comma = date.IndexOf(',');
                    if (comma != -1)
                    {
                        date = date.Substring(comma + 1) + " " + date.Substring(0, comma);
                        DateTime uploaded;
                        if (DateTime.TryParse(date, out uploaded))
                        {
                            info.Uploaded = uploaded;
                        }
                    }
                }

                if (!info.Uploaded.HasValue)
                {
                    errorWriter.WriteLine(info.Url + " had no uploaded date time.");
                }

                ////HtmlNodeCollection uploadNodes = docNode.SelectNodes("//table[@class='wikitable filehistory']/tr/td/a[contains(@href, '/wiki/User:') or contains(@href, '/w/index.php?title=User:')]");

                HtmlNodeCollection uploadNodes =
                    docNode.SelectNodes(
                        "//table[@class='wikitable filehistory']/tr/td/a[contains(@class, 'mw-userlink')]");

                if (uploadNodes != null)
                {
                    HtmlNode firstContrib = uploadNodes.Last();
                    info.Creator = firstContrib.InnerText;
                }

                if (string.IsNullOrEmpty(info.Creator))
                {
                    errorWriter.WriteLine(info.Url + " had no creator.");
                }

                HtmlNode finfoNode = docNode.SelectNodes("//span[@class='fileInfo']")?.FirstOrDefault();

                if (finfoNode != null)
                {
                    //(900 × 657 pixels, file size: 117 KB, MIME type: image/jpeg)
                    string inner = finfoNode.InnerText;
                    if (inner.Contains("image/jpeg")) {
                        string[] words = inner.Substring(1).Split(new[] { ' ' });
                        string width = words[0].Replace(",", "");
                        string height = words[2].Replace(",", "");
                        string sizeNum = words[6].Replace(",", "");
                        string sizeDim = words[7].Substring(0, 2);
                        info.Width = int.Parse(width);
                        info.Height = int.Parse(height);
                        info.FileSizeMB = double.Parse(sizeNum);
                        if (sizeDim == "KB")
                        {
                            info.FileSizeMB = info.FileSizeMB / 1024.0;
                        }
                    } else {
                        info.Width  = 0;
                        info.Height = 0;
                        info.FileSizeMB = 0;
                    }
                }

                if (info.Width == 0)
                {
                    errorWriter.WriteLine(info.Url + " had no file info.");
                }

                // <time class="dtstart" datetime="2015-04-12 09:17:25">12 April 2015, 09:17:25</time>
                // <tr class="exif-datetimeoriginal">
                HtmlNodeCollection startNodes = docNode.SelectNodes("//time[@class='dtstart']");

                // Some images have object date before photo date
                HtmlNode startNode = startNodes == null ? null : startNodes.LastOrDefault();

                if (startNode != null)
                {
                    string timeStamp = startNode.GetAttributeValue("datetime", string.Empty);

                    DateTime created;
                    if (DateTime.TryParse(timeStamp, out created))
                    {
                        info.Created = created;
                    }
                }
                else
                {
                    startNodes = docNode.SelectNodes("//tr[@class='exif-datetimeoriginal']/td");

                    startNode = startNodes == null ? null : startNodes.FirstOrDefault();

                    if (startNode != null)
                    {
                        DateTime created;
                        if (DateTime.TryParse(startNode.InnerText, out created))
                        {
                            info.Created = created;
                        }
                    }
                }

                if (!info.Created.HasValue)
                {
                    //errorWriter.WriteLine(info.Url + " had no created date.");
                }

                var ownWorkNodes = docNode.SelectNodes("//span[@class='int-own-work']");

                info.OwnWork = (ownWorkNodes != null);

                if (!info.OwnWork)
                {
                    errorWriter.WriteLine(info.Url + " may not be own work.");
                }
            }

            return info;
        }

        private static async Task<List<string>> DownloadWikiFile(string url, StreamWriter errorWriter)
        {
            HttpClient client = new HttpClient();
            HttpResponseMessage response = null;
            try
            {
                response = await client.GetAsync(url);
                response.EnsureSuccessStatusCode();
            }
            catch (Exception e)
            {
                errorWriter.WriteLine(url + " gave " + e.Message);
                return null;
            }

            if ((response.StatusCode == HttpStatusCode.OK ||
                 response.StatusCode == HttpStatusCode.Moved ||
                 response.StatusCode == HttpStatusCode.Redirect) &&
                response.Content.Headers.ContentType.MediaType.StartsWith("text", StringComparison.OrdinalIgnoreCase))
            {
                var result = new List<string>();
                // if the remote file was found, download it
                using (Stream inputStream = await response.Content.ReadAsStreamAsync())
                {
                    using (TextReader reader = new StreamReader(inputStream))
                    {
                        while (true)
                        {
                            string line = reader.ReadLine();
                            if (line == null)
                            {
                                return result;
                            }

                            result.Add(line);
                        }
                    }
                }
            }

            errorWriter.WriteLine(url + " gave " + response.StatusCode);
            return null;
        }

        public class FileInfo
        {
            public string FileName { get; set; }
            public string Title { get; set; }
            public string Creator { get; set; }
            public DateTime? Created { get; set; }
            public DateTime? Uploaded { get; set; }
            public int Width { get; set; }
            public int Height { get; set; }
            public bool OwnWork { get; set; }

            public int WidthForSize(int sizePx)
            {
                return (int)(Width * Math.Sqrt(((double)sizePx) / (Width * Height)));
            }

            public string Megapixels
            {
                get
                {
                    double mp = ((Width*Height)/1000.0/1000.0);
                    double mpRound = Math.Floor(mp*10.0)/10.0;

                    return mpRound.ToString("F1");
                }
            }

            public string FileWithoutExt
            {
                get
                {
                    int dot = FileName.LastIndexOf('.');
                    return (dot == -1) ? FileName : FileName.Substring(0, dot);
                }
            }

            public double AspectRatio
            {
                get
                {
                    double ratio = Width > Height ? ((double)Width / Height) : ((double)Height / Width);
                    return Math.Floor(ratio * 10.0) / 10.0;
                }
            }

            public string AspectRatioText
            {
                get
                {
                    return Width > Height
                        ? (AspectRatio.ToString("F1") + ":1")
                        : ("1:" + AspectRatio.ToString("F1"));
                }
            }

            public string Url
            {
                get
                {
                    return "https://commons.wikimedia.org/wiki/File:" + FileName.Replace(' ', '_').Replace("?", "%3F");
                }
            }

            public double FileSizeMB { get; set; }
        }
    }
}