User:Gone Postal/Youtube4Commons.sh
This tool allows the user to download a video from YouTube in a way that is ready to be uploaded to Commons. It creates two files: the video in a VP8/9 format or in Ogg-Theora format and a text file with the wikitext information.
Installation
[edit]You will need the following tools already installed:
- youtube-dl
- ffmpeg2theora
After that simply copy the code from #Code and paste it into a text file, that should be called `YouTube4Commons.sh`. And you will need to place that file into a directory searched by PATH and make it executable with `chmod +x YouTube4Commons.sh`.
Running
[edit]Go into a directory where you want your files to be downloaded and type in a command prompt:
YouTube4Commons.sh youtube-id
Where `youtube-id` is the part of the URL on a YouTube video after ?v=, it should consist of 11 characters (letters, numbers, and symbol -).
Code
[edit]#!/bin/bash DO_DOWNLOAD="true" if [ "${1}" = "--download" ] then shift elif [ "${1}" = "--no-download" ] then DO_DOWNLOAD="false" shift fi videoId="${1}" if [[ $videoId =~ ^https?: ]] then echo "Getting ID from the URL…" videoId=$( youtube-dl --get-id "${videoId}" ) fi if [[ ! ${videoId} =~ ^[-_[:alnum:]]{11}$ ]] then echo "Cannot determine video id. Exiting." exit 1 fi echo "[${videoId}] Retrieving JSON…" json=$(curl --silent "https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v=${videoId}&format=json") echo "[${videoId}] Retrieving HTML…" html=$(curl --silent "https://www.youtube.com/watch?v=${videoId}") echo "[${videoId}] Retrieving video title…" title=$(youtube-dl --get-title https://www.youtube.com/watch?v=${videoId}) echo "[${videoId}] Retrieving video description…" description=$( youtube-dl --get-description https://www.youtube.com/watch?v=${videoId} | sed s@https?://youtu.be/@https://www.youtube.com/watch?v=@g ) echo "[${videoId}] Parsing the received information…" # Channel URL if [[ $json =~ \"author_url\":\"([^\"]+)\" ]] then channelUrl=$(echo ${BASH_REMATCH[1]} | sed 's@\\/@/@g') else echo "[${videoId}] Channel URL is not found." fi # Channel Name if [[ $json =~ \"author_name\":\"([^\"]+)\" ]] then channelName=$( printf "${BASH_REMATCH[1]}" ) else echo "[${videoId}] Channel Name is not found." fi # Title safeTitle=$( echo ${title} | sed 's@||@‖@g' | sed 's@/@⁄@g' | tr '[]{}' '()()' ) # lack of quotes in cat is intentional safeTitle=$(youtube-dl --get-filename https://www.youtube.com/watch?v=${videoId} -o "${safeTitle}" ) # Date Published if [[ $html =~ \<meta\ itemprop\=\"datePublished\"\ content\=\"([^\"]+)\" ]] then datePublished="${BASH_REMATCH[1]}" else echo "[${videoId}] Date Published is not found." fi # Date Uploaded # Unfortunately Youtube actually lies, it doesn't provide the date uploaded, but duplicates publication date #if [[ $html =~ \<meta\ itemprop\=\"uploadDate\"\ content\=\"([^\"]+)\" ]] #then # dateUploaded="${BASH_REMATCH[1]}" #else # echo "[${videoId}] Date Uploaded is not found." #fi # Description if [ -z "$description" ] then echo "[${videoId}] Description was empty, using title." description="${title}" else while [[ $description =~ (https?://goo\.gl/[[:alnum:]]{6}) ]] do correctUrl=$(curl --location --silent --output /dev/null --write-out %{url_effective} ${BASH_REMATCH[1]}) echo "[${videoId}] Changing ${BASH_REMATCH[1]} into ${correctUrl}" description=${description//${BASH_REMATCH[1]}/$correctUrl} done description=${description//https:\/\/youtu\.be\//https:\/\/www.youtube.com\/watch?v=} fi # Licence if [ -n "$( echo "${html}"| grep 'https:\/\/www.youtube.com\/t\/creative_commons' )" ] then echo "[${videoId}] Found CC-BY licence" licenceTemplate="{{YouTube CC-BY|${channelName}}}" else echo "[${videoId}] Found only Standard Youtube License" licenceTemplate="{{Standard YouTube License}}" fi # Tags if [[ $html =~ '<meta property="og:video:tag" content=' ]] then tags=$(echo "${html}" | grep '<meta property="og:video:tag" content=' | sed s/^[[:space:]]*// | sed -z -e 's/<meta property=\"og:video:tag\" content=\"\([^"]*\)\">/\1 |/g' ) tags=$( echo $tags ) #destroying newlines tags=${tags%%|} # removing last | fi # Genre if [[ $html =~ \<meta\ itemprop\=\"genre\"\ content\=\"([^\"]+)\" ]] then genre="${BASH_REMATCH[1]}" fi # Family Friendly if [[ $html =~ \<meta\ itemprop\=\"isFamilyFriendly\"\ content\=\"([^\"]+)\" ]] then familyFriendly="${BASH_REMATCH[1]}" fi # download if [ $DO_DOWNLOAD = "true" ] then echo "[${videoId}] Attempting to download Webm…" youtube-dl --format 'bestvideo[ext=webm]+bestaudio[ext=webm]/best[ext=webm]' "https://www.youtube.com/watch?v=${videoId}" -o "${safeTitle}.webm" if [ $? -eq 0 ] then echo "[${videoId}] Setting metadata…" dateEncodedISO=$( date --reference="${safeTitle}.webm" --utc --iso-8601=seconds ) #dateUploaded=$(date --reference="${safeTitle}.webm" --utc --rfc-3339=seconds) mkvpropedit "${safeTitle}.webm" --set title="${title}" --set date="${dateEncodedISO}" --add-track-statistics-tags else echo "[${videoId}] Downloading non-free format and with reencoding into Ogg…" tempFile=$(youtube-dl "https://www.youtube.com/watch?v=${videoId}" -o "${safeTitle}.%(ext)s" --get-filename) youtube-dl "https://www.youtube.com/watch?v=${videoId}" -o "${safeTitle}.%(ext)s" # temporary work-around due to https://github.com/ytdl-org/youtube-dl/issues/5710 if [ ! -e "${tempFile}" ] then tempFile="$(basename "${tempFile}" .mp4).mkv" fi ffmpeg2theora -o "${safeTitle}.ogv" -v 6 --optimize -a 3 --artist "${channelName}" --title "${title}" --date "${datePublished}" --contact "${channelUrl}" "${tempFile}" #dateUploaded=$(date --reference="${tempFile}" --utc --rfc-3339=seconds) rm "${tempFile}" fi else echo "Skipping download" fi echo "[${videoId}] Writing the information file…" echo -n "== {{int:filedesc}} == {{Information |description={{en|1=${description}}} |date={{published on|${datePublished}|cat=yes|platform=YouTube|location=}} |source={{From YouTube|1=${videoId}|2=${title}}} |author=[${channelUrl} ${channelName}] |permission= |other_versions= |other_fields= {{information field|name=Social network tags|value={{flatlist|{{site Tags|YouTube|${tags}}} }} }} {{information field|name=Genre|value=${genre} }} {{information field|name=Family Friendly|value=${familyFriendly} }} }} ==Licence and copyright information== ${licenceTemplate} {{YouTubeReview}} [[Category:Videos needing display resolution category]]" > "${safeTitle}.text"