Trappist the monk (talk | contribs) m →description: ce; |
Trappist the monk (talk | contribs) →script: update script; |
||
Line 36:
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead'
// when assigned value is 'no', replace assigned value with 'live'
// when empty, and when |archive-url= is empty, delete
// when empty, and when |archive-url= has a value, retain but do not fill
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/
Line 47 ⟶ 48:
string pattern;
int fixed_count = 0;
int deleted_count = 0;
//---------------------------< S T A R T >--------------------------------------------------------------------
ArticleText = hide (ArticleText, IS_CS1, true); // hide all templates that aren't cs1
Line 62 ⟶ 63:
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied
// and 'filled in' by well meaning editors.
//
// When |archive-url= has a value and |url-status= is present but empty, leave |url-status= in place.
//
Line 74 ⟶ 77:
pattern = @"dead\-?url";
fixed_template = Regex.Replace (raw_template, pattern, "url-status"); // replace parameter name
pattern = @"(url\-status\s*=\s*)(?:yes|y|true)";
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead'
// fixed_template = Regex.Replace (fixed_template, pattern, "$1"); // replace 'yes', 'y', 'true' with 'dead'▼
pattern = @"(url\-status\s*=\s*)no";
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live'
pattern = @"\|\s*archive\-?url\s*=\s*[/\w]";
fixed_template = empty_param_remove (fixed_template); // remove all empty parameters from this template▼
if (Regex.Match (fixed_template, pattern).Success) // if |archive-url= is present and has a value
{▼
pattern = @"(\|\s*url\-status\s*=)(\s*[\|\}])";
fixed_template = Regex.Replace (fixed_template, pattern, "$1__3MP7Y__$2"); // if |url-status= is empty add empty secret word
}
▲ fixed_template = empty_param_remove (fixed_template); // remove all empty parameters from this template
pattern = @"__3MP7Y__";
▲
pattern = @"url\-status";
Line 92 ⟶ 102:
else
fixed_count++;
return fixed_template;
});
Line 101 ⟶ 111:
ArticleText = unhide (ArticleText, true); // unhide all that is hidden
if ((0 == deleted_count) && (0 == fixed_count))
{
Line 111 ⟶ 121:
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;";
return ArticleText;
}
Line 125 ⟶ 135:
private string hide (string ArticleText, string dont_hide, bool wikilinks)
{
string pattern = @"\{\{(?!\s*" + dont_hide + @")
{
ArticleText = Regex.Replace(ArticleText, pattern, "__0P3N__$1__CL0S3__");▼
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a hidden template is assembled here
string raw_template = match.Groups[0].Value; // the whole template
pattern = @"\{\{"; // hide the opening {{
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__");
pattern = @"\}\}"; // hide the closing }}
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__");
pattern = @"\|"; // and hide the pipes
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__");
return fixed_template;
});
}
pattern = @"([^\{])\{([^\{])"; // single opening curly brace
pattern = @"([^\}])\}([^\}])"; // single closing curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2");
if (wikilinks)
{
Line 154 ⟶ 189:
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with |
}
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with {
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with }
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{
Line 164 ⟶ 202:
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------
//
// This function removes all empty named parameters from a template, attempting to leave what remains the same form.
//
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single
// line of text, vertical – all parameter written singly one-to-a-line, and a mix of the two – multiple lines
// where each has one or more parameters.
//
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '='
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline
// 3. for any, empties are followed by pipe closing }; remove the empty but leave the | or }
// 4. the preceding steps can leave blank lines; remove the blank lines
//
private string empty_param_remove (string template)
{
string pattern = @"(\|[^=]+=[ \
while (Regex.Match(template, pattern).Success) // put them on the same line
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
|
Revision as of 21:35, 4 June 2019
The next version of the Module:Citation/CS1 suite will deprecate |dead-url=
and |deadurl=
because these two parameters violate the nominal standard that says that parameters ending in -url
hold a url as a value.
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Sandbox | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. |
Sandbox | Title. Archived from the original on 2015-05-19. |
The purpose of task 15 is to replace various combinations of |dead-url=
and |deadurl=
and their associated keywords with |url-status=
and its appropriate keywords.
description
|dead-url=
and |deadurl=
accept a limited set of keywords that control the rendering of cs1|2 citation templates that have archive urls. The keywords that concern this task are:
yes
,y
,true
,no
the remaining keywords, retain their meaning and purpose:
unfit
,usurped
,bot: unknown
Because |url-status=no
and |url-status=yes
(and the other 'positive' keywords) are nonsensical, live
(replacing no
) and dead
(replacing yes
...) have been assigned to this parameter.
Task 15 searches for templates that use either of the |dead-url=
and |deadurl=
parameters (with or without assigned keyword) and then:
- renames the parameter to
url-status
- replaces the assigned keyword
no
withlive
, and replaces the assigned keywordsyes
,y
, andtrue
withdead
; keywordsunfit
,usurped
,bot: unknown
are retained - deletes all empty parameters (will delete an empty
|url-status=
parameter)
|dead-url=
and |deadurl=
without an assigned keyword are intentionally included in this process so that the deprecated, and ultimately unsupported, parameters don't linger in article space.
Still to be decided: dead
is the default value when |url-status=
is omitted or empty. As such, |url-status=dead
is redundant so it may be appropriate to simply delete |dead-url=
and |deadurl=
when they hold yes
, y
, or true
.
edit summaries
Task 15 writes an edit summary message that tallies the number of replacements and the number of deletions. The message has the form:
- replaced (n×) / removed (n×) deprecated |dead-url= and |deadurl= with |url-status=;
the edit summary has a link to this page.
ancillary tasks
Deletes all empty parameters from templates that are repaired.
This task does not do awb general fixes.
script
// remove, replace |deadurl= and |dead-url= with |url-status=
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead'
// when assigned value is 'no', replace assigned value with 'live'
// when empty, and when |archive-url= is empty, delete
// when empty, and when |archive-url= has a value, retain but do not fill
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false; // for development, never skip; for the bot set this true then when fixes are made, set it false
string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|newspaper|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
string pattern;
int fixed_count = 0;
int deleted_count = 0;
//---------------------------< S T A R T >--------------------------------------------------------------------
ArticleText = hide (ArticleText, IS_CS1, true); // hide all templates that aren't cs1 templates & hide wikilinks
//---------------------------< R E N A M E D E A D U R L >--------------------------------------------------
//
// renames |deadurl= and |dead-url= to |url-status=; replace assigned values. Empty |deadurl= and |dead-url=
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied
// and 'filled in' by well meaning editors.
//
// When |archive-url= has a value and |url-status= is present but empty, leave |url-status= in place.
//
pattern = @"\{\{\s*" + IS_CS1 + @"[^}]*\|\s*dead\-?url[^\}]+\}\}"; // cs1|2 template has one of |dead-url= or |deadurl= with or without a value
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a fixed citation template is assembled here
string raw_template = match.Groups[0].Value; // the whole citation template; if we can't fix the template then return raw_template
pattern = @"dead\-?url";
fixed_template = Regex.Replace (raw_template, pattern, "url-status"); // replace parameter name
pattern = @"(url\-status\s*=\s*)(?:yes|y|true)";
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead'
pattern = @"(url\-status\s*=\s*)no";
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live'
pattern = @"\|\s*archive\-?url\s*=\s*[/\w]";
if (Regex.Match (fixed_template, pattern).Success) // if |archive-url= is present and has a value
{
pattern = @"(\|\s*url\-status\s*=)(\s*[\|\}])";
fixed_template = Regex.Replace (fixed_template, pattern, "$1__3MP7Y__$2"); // if |url-status= is empty add empty secret word
}
fixed_template = empty_param_remove (fixed_template); // remove all empty parameters from this template
pattern = @"__3MP7Y__";
fixed_template = Regex.Replace (fixed_template, pattern, ""); // remove empty secret word
pattern = @"url\-status";
if (!Regex.Match (fixed_template, pattern).Success)
deleted_count++;
else
fixed_count++;
return fixed_template;
});
}
//---------------------------< F I N I S H >------------------------------------------------------------------
ArticleText = unhide (ArticleText, true); // unhide all that is hidden
if ((0 == deleted_count) && (0 == fixed_count))
{
Summary = @"";
Skip = true;
}
else
Summary = "[[User:Monkbot/task 15: remove replace deprecated dead-url params|Task 15]] (developmental testing):";
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;";
return ArticleText;
}
//===========================<< S U P P O R T >>==============================================================
//---------------------------< H I D E >----------------------------------------------------------------------
//
// HIDE TEMPLATES: find templates that are not <dont_hide>; replace the opening {{ with __0P3N__ and the closing }} with __CL0S3__
//
private string hide (string ArticleText, string dont_hide, bool wikilinks)
{
string pattern = @"\{\{(?!\s*" + dont_hide + @")[^\{\}]*\}\}";
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a hidden template is assembled here
string raw_template = match.Groups[0].Value; // the whole template
pattern = @"\{\{"; // hide the opening {{
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__");
pattern = @"\}\}"; // hide the closing }}
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__");
pattern = @"\|"; // and hide the pipes
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__");
return fixed_template;
});
}
pattern = @"([^\{])\{([^\{])"; // single opening curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__0CU!21Y__$2");
pattern = @"([^\}])\}([^\}])"; // single closing curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2");
if (wikilinks)
{
pattern = @"\[\[(?![Ff]ile|[Ii]mage)([^\|\]]+)\|([^\]]+)\]\]"; // HIDE complex wikilinks: [[article title|label]] to __WL1NK_O__article title__P1P3__label__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__P1P3__$2__WL1NK_C__"); // [[File: with wikilinks inside can be confusing
pattern = @"\[\[([^\]]+)\]\]"; // HIDE simple wikilinks: [[article title]] to __WL1NK_O__article title__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__WL1NK_C__");
}
return ArticleText;
}
//---------------------------< U N H I D E >------------------------------------------------------------------
//
// UNHIDE TEMPLATES: find templates that are hidden; replace the 'hide' strings with the appropriate wiki markup
//
private string unhide (string ArticleText, bool wikilinks)
{
if (wikilinks)
{
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_O__", "[["); // UNHIDE: replace __WL1NK_O__ with [[
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_C__", "]]"); // UNHIDE: replace __WL1NK_C__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with |
}
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with {
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with }
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}"); // UNHIDE: replace __CL0S3__ with }}
return ArticleText;
}
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------
//
// This function removes all empty named parameters from a template, attempting to leave what remains the same form.
//
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single
// line of text, vertical – all parameter written singly one-to-a-line, and a mix of the two – multiple lines
// where each has one or more parameters.
//
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '='
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline
// 3. for any, empties are followed by pipe closing }; remove the empty but leave the | or }
// 4. the preceding steps can leave blank lines; remove the blank lines
//
private string empty_param_remove (string template)
{
string pattern = @"(\|[^=]+=[ \t]*)[\r\n]+(?!\s*[\|\}])"; // parameter name & '=' on one line, value on a following line
while (Regex.Match(template, pattern).Success) // put them on the same line
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
return template;
}