MediaWiki:FileAnalyzer.js

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Note: After saving, you have to bypass your browser's cache to see the changes. Internet Explorer: press Ctrl-F5, Mozilla: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Opera/Konqueror: press F5, Safari: hold down Shift + Alt while clicking Reload, Chrome: hold down Shift while clicking Reload.
/**
 * [[MediaWiki:FileAnalyzer.js]]
 *
 * Front End for: [[MediaWiki:WebWorker-FileAnalyzer.js]]
 * Analyzes files scanning for issues that could arise while uploading
 * including calculation of the SHA1 checksum which is then checked
 * against existing files via API
 * and against Rillke's toollabs-account.
 *
 */
/*jshint */
/*global jQuery:false, mediaWiki:false*/
(function ($, mw) {
	'use strict';

	var oldTitle = document.title,
		getUrl = function (title) {
			return location.protocol + '//' + location.host + mw.util.wikiScript() + '?' + $.param({
				action: 'raw',
				ctype: 'text/javascript',
				title: title
			});
		},
		fa;
		
	fa = mw.libs.FileAnalyzer = {
		version: '0.0.8.1',
		targetSelector: '#com-fa-startcontainer',
		userIsAdmin: $.inArray('sysop', mw.config.get('wgUserGroups')) > -1,

		dbTimestamp2LocalString: function( ts ) {
			var m = ts.match(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/);
			return new Date(m[1], m[2], m[3], m[4], m[5], m[6]).toLocaleString();
		},

		$querySHA: function (sha1, size, log) {
			var $xhrs = [], $xhr;
			if (size !== undefined) size = Number(size);
			
			// Existing files whose revisions are on top
			$xhr = mw.libs.commons.api.$query({
				action: 'query',
				generator: 'allimages',
				gaisha1: sha1,
				prop: 'imageinfo',
				iiprop: 'size'
			}).done(function (r) {
				if (r.query && r.query.pages) {
					var $lis = $();
					$.each(r.query.pages, function (id, pg) {
						if (size && pg.imageinfo && Number(pg.imageinfo[0].size) !== size) return;
						$lis = $lis.add($('<a>').text(pg.title).attr('href', mw.util.getUrl(pg.title)));
					});
					log($('<div>').text("EXISTING FILES:").append($('<ul>').append($lis.wrap('<li>').parent())));
				} else {
					log("No files do EXIST having SHA1=" + sha1 + ".");
				}
			});
			$xhrs.push($xhr);
			
			// Overwritten files
			// and delted files for non-admins
			var params = {
				action: 'sha1lookup',
				sha1: sha1
			};
			if (!fa.userIsAdmin) {
				params.showdeleted = 1;
			}
			$xhr = $.getJSON('//tools.wmflabs.org/expose-data/jsonapi.php', params).done(function(r) {
				if (r.sha1lookup && $.isArray(r.sha1lookup.oldimage)) {
					var $lis;
					if (r.sha1lookup.oldimage.length) {
						$lis = $();
						$.each(r.sha1lookup.oldimage, function (i, img) {
							if (size && Number(img.oi_size) !== size) return;

							$lis = $lis.add($('<a>').text(
										img.oi_name.replace(/_/g, ' ') + 
										' Uploaded: ' + fa.dbTimestamp2LocalString( img.oi_timestamp )
									).attr('href', mw.util.getUrl('File:' + img.oi_name)));
						});
						log($('<div>').text("OLD FILES:").append($('<ul>').append($lis.wrap('<li>').parent())));
					} else {
						log("No files OVERWRITTEN having SHA1=" + sha1 + ".");
					}
					if (r.sha1lookup.filearchive && r.sha1lookup.filearchive.length) {
						$lis = $();
						$.each(r.sha1lookup.filearchive, function (i, img) {
							if (size && Number(img.fa_size) !== size) return;

							$lis = $lis.add($('<a>').addClass('new').text(
										img.fa_name.replace(/_/g, ' ') + 
										' Uploaded: ' + fa.dbTimestamp2LocalString( img.fa_timestamp )
									).attr('href', mw.util.getUrl('File:' + img.fa_name)));
						});
						log($('<div>').text("DELETED FILES:").append($('<ul>').append($lis.wrap('<li>').parent())));
					} else {
						if (!fa.userIsAdmin) log("No files DELETED having SHA1=" + sha1 + ".");
					}
				} else {
					log("Issue with the response from Rillke's toollabs account: 200 but no content.");
				}
			}).fail(function() {
				log("Server error @Rillke's tool account.");
			});
			$xhrs.push($xhr);
			
			// Deleted files
			if (fa.userIsAdmin) {
				$xhr = mw.libs.commons.api.$query({
					action: 'query',
					list: 'filearchive',
					fasha1: sha1,
					falimit: 'max',
					faprop: 'timestamp|size'
				}).done(function (r) {
					if (r.query && r.query.filearchive && r.query.filearchive.length) {
						var $lis = $();
						$.each(r.query.filearchive, function (id, fa) {
							if (size && Number(fa.size) !== size) return;
							$lis = $lis.add($('<a>').addClass('new').text(fa.title + ' Uploaded: ' + fa.timestamp).attr('href', mw.util.getUrl(fa.title)));
						});
						log($('<div>').text("DELETED FILES:").append($('<ul>').append($lis.wrap('<li>').parent())));
					} else {
						log("No files DELETED having SHA1=" + sha1 + ".");
					}
				});
				$xhrs.push($xhr);
			}
			
			return $.when.apply($, $xhrs);
		},
		
		addIssues: function(issues, log) {
			var $issueGroupT = $('<div>').addClass('warning'),
				$issueCollectionT = $('<div>').css('margin', '0.4em'),
				$issuePositionsT = $('<i>').css('margin', '0.4em'),
				lastIssueAt = 0,
				flatIssueList = [];
			
			/**
			* Add issue to a flat list of issues
			*/
			var addIssue = function(issueKey, pattern, pos, rangeAffected) {
				// It is only an issue, if it can be detected
				// it can be only detected if not broken in chunks, and
				// often only the first 1024 bytes are checked
				flatIssueList.push( $.extend({}, pos, { affects: rangeAffected }) );
				lastIssueAt = Math.max(lastIssueAt, pos.at);
			};
			
			$.each(issues, function(issueKey, issue) {
				var shouldAdd = false,
					$issueGroup = $issueGroupT.clone().text(issueKey);
				
				$.each(issue.searches, function(i, issueSearch) {
					var collection = [],
						$issueCollection = $issueCollectionT.clone().text('matching "' + issue.pattern[i] + '" @position:');
						
					$.each(issueSearch.matches, function(x, issueMatch) {
						shouldAdd = true;
						collection.push( issueMatch.at );
						addIssue( issueKey, issueSearch.term, issueMatch, issue.notBefore );
					});
					
					if (collection.length) {
						$issueCollection.append($issuePositionsT.clone().text(collection.join(', '))).appendTo($issueGroup);
					}
				});
				
				if (shouldAdd) {
					log($issueGroup);
				}
			});
			
			
			var url = getUrl('MediaWiki:WebWorker.js'),
				worker = new Worker(url);
			
			log( $('<span>').css('color', 'green').text("Computing possible chunk sizes for uploading") );
			worker.postMessage({
				operation: 'run',
				exec: 'analyzeIssueList',
				scripts: ['MediaWiki:WebWorker-IssueAnalyzer.js'],
				issueList: flatIssueList
			});
			
			worker.addEventListener('message', function (e) {
				var d = e.data;

				if (!d.reply) throw new Error('Incompatible reply: ', d);
				switch (d.reply) {
					case 'progress':
						// TODO: Provide link to chunked upload script
						log( $('<span>').css('color', 'green').text("Possible chunk size: " + d.data.possibleSize) );
						break;
					case 'Done':
						if (d.data.possibleSizes.length === 0) {
							log( $('<span>').css('color', 'green').text("No chunk size that would work found!") );
						}
						break;
				}
			});
		},

		$makeUI: function () {
			var $input = $('<input id="com-file-analyzer-fileinput" type="file" autofocus/>')
				.css({
					'text-align': 'center',
					'font-size': '300px',
					'cursor': 'pointer'
				})
				.appendTo('#com-drop-frame-fileinput'),
				$progress = $('<progress>')
					.attr({
						max: 100,
						value: 0
					})
					.css({
						'display': 'inline-block',
						'text-align': 'center',
						'width': '99%'
					})
					.text('Your browser does not support this tool. Upgrade to a modern browser, please.')
					.appendTo($('#com-drop-frame-progressbar').text('')),
				$output = $('<div>')
					.css({
						'font-weight': 'bold'
					})
					.text('Idle')
					.appendTo($('#com-drop-frame-log').text('')),
				$dragArrow = $('#com-drop-arrow'),
				$dropFrame = $('#com-drop-frame-accept-drop'),
				$dropFrameIdle = $('#com-drop-frame-idle'),
				$dropFrameProcessing = $('#com-drop-frame-processing');


			var $logline = $('<div>')
				.css({
					'border-top': '2px dashed #DDD',
					'margin': '4px 0'
				}),
				log = function ($content) {
					var $ll;
					if ($content.jquery) {
						$ll = $logline.clone().append($content).appendTo($output);
					} else {
						$ll = $logline.clone().text($content).appendTo($output);
					}
					$output.parent().clearQueue().animate({
						scrollTop: $output.parent().scrollTop() + $ll.position().top
					}, 800);
				};

			var processFile = function (file) {
				var url, worker;
				$dropFrame.off('drop', __ondrop).off('dragover', __ondragover);

				$dropFrameIdle.fadeOut();
				$dropFrameProcessing.fadeIn();

				log("Starting webworker.");
				document.title = 'File Analyzer - ' + mw.config.get('wgSiteName');
				url = getUrl('MediaWiki:WebWorker.js');
				worker = new Worker(url);
				
				log("Preparing analysis.");
				worker.postMessage({
					operation: 'run',
					exec: 'analyzeFile',
					scripts: ['MediaWiki:WebWorker-FileAnalyzer.js'],
					file: file
				});
				
				
				worker.addEventListener('message', function (e) {
					var d = e.data;

					if (!d.reply) throw new Error('Incompatible reply: ', d);
					switch (d.reply) {
						case 'progress':
							$progress.attr('value', d.data.percent);
							document.title = d.data.percent + "% File Analyzer - " + mw.config.get('wgSiteName');
							break;
						case 'executing':
							log("Analyzing file '" + file.name + "'");
							break;
						case 'Done':
							var __ready = function () {
								document.title = oldTitle;
								$dropFrameIdle.fadeIn();
								$dropFrameProcessing.fadeOut();
								$dropFrame.on('drop', __ondrop).on('dragover', __ondragover);
							};

							$progress.attr('value', 100);
							document.title = "100% File Analyzer - " + mw.config.get('wgSiteName');
							log($('<span>').append($('<span>').text("Checking checksum "), $('<i>').text(d.data.sha1), $('<span>').text(" against existing files.")));
							fa.$querySHA(d.data.sha1, file.size, log)
								.done(__ready)
								.fail(__ready);
								
							fa.addIssues(d.data.issues, log);
							break;
						case 'Failed':
							log($('<span>').attr('class', 'error').text(d.data.error));
							break;
						default:
							// console.log(d);
							break;
					}
				}, false);
			};

			var __ondrop = function (e) {
				var f = e.originalEvent.dataTransfer.files[0];
				e.preventDefault();
				$dragArrow.hide();

				if (f) processFile(f);
			};
			var __ondragover = function (e) {
				e.preventDefault();
				$dragArrow.show();
			};

			$dropFrame.on({
				drop: __ondrop,
				dragover: __ondragover,
				dragleave: function () {
					$dragArrow.hide();
				},
				dragend: function (e) {
					e.preventDefault();
					$dragArrow.hide();
				}
			});

			$input.change(function () {
				processFile($input[0].files[0]);
			});
			
			$('#com-sha1lookupcontainer').empty().append(fa.$getUI(log));
		},
		createUI: function () {
			$(fa.targetSelector).hide();
			$('#com-fa-analyzecontainer').show();
			fa.$makeUI();
		},
		// Returns UI for sha1lookup
		$getUI: function(log) {
			var $ui = $('<div>'),
				$sha1LookupContainer = $('<div>')
					.appendTo($ui),
				$sha1Label = $('<label for="com-sha1inputfield" style="width:21em; min-width=25%; display:inline-block"></label>')
					.text("Insert a SHA1 hash (hexadecimal): ")
					.appendTo($sha1LookupContainer),
				$sha1Input = $('<input id="com-sha1inputfield" size="40" pattern="^[a-fA-F0-9]{40}$" style="width:30em; min-width=50%" placeholder="e.g. C5729501FEBE7F9A33C74AD3C2ED1E7E5F318DBA"/>')
					.appendTo($sha1LookupContainer),
				$fileLookupContainer = $('<div>')
					.appendTo($ui),
				$fileLabel = $('<label for="com-filenameinputfield" style="width:21em; min-width=25%; display:inline-block"></label>')
					.text("Or find all existing duplicates of file: ")
					.appendTo($fileLookupContainer),
				$fileInput = $('<input id="com-sha1inputfield" pattern="^[^\\|<>]{3,}$" style="width:30em; min-width=50%" placeholder="e.g. Homemade waterfilter.jpg, confirm with [ENTER]"/>')
					.appendTo($fileLookupContainer);
					
			var delay = (function() {
				var to, delay = 300;
				return function(cb) {
					clearTimeout(to);
					to = setTimeout(function() {
						cb();
						delay += 100;
						delay = Math.min(delay, 1800);
					}, delay);
				};
			}());
			
			$sha1Input.on('input change keyup', function() {
				var oldVal = $sha1Input.val(),
					newVal = oldVal;
				
				newVal = newVal.replace(/[^a-fA-F0-9]/g, '');
				if (newVal !== oldVal) $sha1Input.val(newVal);
				
				if (!/^[a-fA-F0-9]{40}$/.test(newVal)) return;
				$sha1Input.prop('disabled', true);
				
				var __unlock = function() {
					$sha1Input.prop('disabled', false);
				};
				
				delay(function() {
					fa.$querySHA(newVal, undefined, log)
						.done(__unlock)
						.fail(__unlock);
				});
			});
			
			$fileInput.keyup(function(e) {
				if (e.which !== 13) return;

				var oldVal = $.trim($fileInput.val()),
					newVal = oldVal,
					$dlg = $('<ul>')
						.attr('title', 'Select file version to search for duplicates'),
					additions = 0,
					$xhrs = [], $xhr, lastAddition;
					
				if (!/^[Ff]ile:/.test(newVal)) newVal = 'File:' + newVal;
				if (newVal !== oldVal) $fileInput.val(newVal);
				$fileInput.prop('disabled', true);
				
				var __unlock = function() {
					$fileInput.prop('disabled', false);
					if (additions === 1) {
						fa.$querySHA(lastAddition.sha1, lastAddition.size, log);
						log('Looking for ' + lastAddition.sha1 + ' of size ' + lastAddition.size);
					} else if (additions > 1) {
						// Dialog
						mw.loader.using('jquery.ui', function() {
							$dlg.dialog({
								close: function() {
									$dlg.remove();
								},
								modal: true,
								width: 800
							});
						});
					}
				};

				var addII = function(timestamp, user, comment, sha1, size, exists) {
					additions++;
					lastAddition = {
						sha1: sha1,
						size: size
					};
					var $li = $('<li>'),
						$a = $('<a>')
							.attr('href', '#')
							.addClass(exists ? '' : 'new')
							.text(timestamp + ' -- User:' + user + ' -- Comment:' + comment + ' -- Exists:' + exists)
							.click(function(e) {
								e.preventDefault();
								log('Looking for ' + sha1 + ' of size ' + size);
								$dlg.dialog('close');
								fa.$querySHA(sha1, size, log);
							})
							.appendTo($li);
					$li.appendTo($dlg);
				};

				$xhr = mw.libs.commons.api.$query({
					action: 'query',
					prop: 'imageinfo',
					iiprop: 'size|sha1|user|timestamp|comment',
					iilimit: 'max',
					titles: newVal
				}).done(function (r) {
					if (r.query.pages) {
						$.each(r.query.pages, function(pid, pg) {
							if (!pg.imageinfo || !pg.imageinfo[0]) {
								return;
							}
							$.each(pg.imageinfo, function(i, ii) {
								addII(new Date(ii.timestamp).toLocaleString(), ii.user, ii.comment, ii.sha1, ii.size, true);
							});
						});
					}
				});
				$xhrs.push($xhr);
				$xhr = $.getJSON('//tools.wmflabs.org/expose-data/jsonapi.php', {
					action: 'getsha1',
					filename: newVal.replace(/^[Ff]ile:/, '')
				}).done(function(r) {
					$.each(r.getsha1.filearchive, function(i, img) {
						addII(fa.dbTimestamp2LocalString(img.fa_timestamp), img.fa_user_text, img.fa_description, img.fa_sha1, img.fa_size, false);
					});
				});
				$xhrs.push($xhr);
				$.when.apply($, $xhrs).done(__unlock).fail(__unlock);
			});

			return $ui;
		}
	};


	// Auto-starter
	if (mw.config.get('wgPageName') !== 'Commons:User_scripts/File_Analyzer') return;
	if (mw.config.get('wgAction') !== 'view') return;

	var launch = function ($c) {
		if ($(fa.targetSelector, $c).length) mw.loader.using(['ext.gadget.libAPI', 'mediawiki.util'], fa.createUI);
	};
	mw.hook('wikipage.content').add(launch);
}(jQuery, mediaWiki));