Trappist the monk (talk | contribs) m →description: ce; |
Trappist the monk (talk | contribs) →script: update script; |
||
Line 36: | Line 36: | ||
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead' |
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead' |
||
// when assigned value is 'no', replace assigned value with 'live' |
// when assigned value is 'no', replace assigned value with 'live' |
||
// when empty, delete |
// when empty, and when |archive-url= is empty, delete |
||
// when empty, and when |archive-url= has a value, retain but do not fill |
|||
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/ |
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/ |
||
Line 47: | Line 48: | ||
string pattern; |
string pattern; |
||
int fixed_count = 0; |
int fixed_count = 0; |
||
int deleted_count = 0; |
int deleted_count = 0; |
||
//---------------------------< S T A R T >-------------------------------------------------------------------- |
//---------------------------< S T A R T >-------------------------------------------------------------------- |
||
ArticleText = hide (ArticleText, IS_CS1, true); // hide all templates that aren't cs1 |
ArticleText = hide (ArticleText, IS_CS1, true); // hide all templates that aren't cs1 templates & hide wikilinks |
||
Line 62: | Line 63: | ||
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied |
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied |
||
// and 'filled in' by well meaning editors. |
// and 'filled in' by well meaning editors. |
||
// |
|||
// When |archive-url= has a value and |url-status= is present but empty, leave |url-status= in place. |
|||
// |
// |
||
Line 74: | Line 77: | ||
pattern = @"dead\-?url"; |
pattern = @"dead\-?url"; |
||
fixed_template = Regex.Replace (raw_template, pattern, "url-status"); // replace |
fixed_template = Regex.Replace (raw_template, pattern, "url-status"); // replace parameter name |
||
pattern = @"(url\-status\s*=\s*)(?:yes|y|true)"; |
pattern = @"(url\-status\s*=\s*)(?:yes|y|true)"; |
||
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead' |
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead' |
||
// alt version, remove 'yes', 'y', 'true' so that the parameter will be deleted; |dead-url=yes is the default state |
|||
⚫ | |||
pattern = @"(url\-status\s*=\s*)no"; |
pattern = @"(url\-status\s*=\s*)no"; |
||
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live' |
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live' |
||
pattern = @"\|\s*archive\-?url\s*=\s*[/\w]"; |
|||
⚫ | |||
if (Regex.Match (fixed_template, pattern).Success) // if |archive-url= is present and has a value |
|||
⚫ | |||
pattern = @"(\|\s*url\-status\s*=)(\s*[\|\}])"; |
|||
fixed_template = Regex.Replace (fixed_template, pattern, "$1__3MP7Y__$2"); // if |url-status= is empty add empty secret word |
|||
} |
|||
⚫ | |||
pattern = @"__3MP7Y__"; |
|||
⚫ | |||
pattern = @"url\-status"; |
pattern = @"url\-status"; |
||
Line 92: | Line 102: | ||
else |
else |
||
fixed_count++; |
fixed_count++; |
||
⚫ | |||
return fixed_template; |
return fixed_template; |
||
}); |
}); |
||
Line 101: | Line 111: | ||
ArticleText = unhide (ArticleText, true); // unhide all that is hidden |
ArticleText = unhide (ArticleText, true); // unhide all that is hidden |
||
if ((0 == deleted_count) && (0 == fixed_count)) |
if ((0 == deleted_count) && (0 == fixed_count)) |
||
{ |
{ |
||
Line 111: | Line 121: | ||
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;"; |
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;"; |
||
return ArticleText; |
return ArticleText; |
||
} |
} |
||
Line 125: | Line 135: | ||
private string hide (string ArticleText, string dont_hide, bool wikilinks) |
private string hide (string ArticleText, string dont_hide, bool wikilinks) |
||
{ |
{ |
||
string pattern = @"\{\{(?!\s*" + dont_hide + @") |
string pattern = @"\{\{(?!\s*" + dont_hide + @")[^\{\}]*\}\}"; |
||
if (Regex.Match (ArticleText, pattern).Success) |
|||
{ |
|||
⚫ | |||
ArticleText = Regex.Replace(ArticleText, pattern, |
|||
delegate(Match match) |
|||
{ |
|||
string fixed_template; // a hidden template is assembled here |
|||
string raw_template = match.Groups[0].Value; // the whole template |
|||
pattern = @"\{\{"; // hide the opening {{ |
|||
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__"); |
|||
pattern = @"\}\}"; // hide the closing }} |
|||
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__"); |
|||
pattern = @"\|"; // and hide the pipes |
|||
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__"); |
|||
return fixed_template; |
|||
}); |
|||
} |
|||
pattern = @"([^\{])\{([^\{])"; // single opening curly brace |
|||
⚫ | |||
pattern = @"([^\}])\}([^\}])"; // single closing curly brace |
|||
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2"); |
|||
if (wikilinks) |
if (wikilinks) |
||
{ |
{ |
||
Line 154: | Line 189: | ||
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with | |
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with | |
||
} |
} |
||
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with { |
|||
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with } |
|||
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{ |
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{ |
||
Line 164: | Line 202: | ||
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------ |
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------ |
||
// |
// |
||
// This function removes all empty named parameters from a template |
// This function removes all empty named parameters from a template, attempting to leave what remains the same form. |
||
// |
|||
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in |
|||
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single |
|||
// line of text, vertical – all parameter written singly one-to-a-line, and a mix of the two – multiple lines |
|||
// where each has one or more parameters. |
|||
// |
|||
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '=' |
|||
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline |
|||
// 3. for any, empties are followed by pipe closing }; remove the empty but leave the | or } |
|||
// 4. the preceding steps can leave blank lines; remove the blank lines |
|||
// |
// |
||
private string empty_param_remove (string template) |
private string empty_param_remove (string template) |
||
{ |
{ |
||
string pattern = @"\|[^=]+=\ |
string pattern = @"(\|[^=]+=[ \t]*)[\r\n]+(?!\s*[\|\}])"; // parameter name & '=' on one line, value on a following line |
||
while (Regex.Match(template, pattern).Success) // put them on the same line |
|||
template = Regex.Replace(template, pattern, "$1"); |
|||
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line |
|||
while (Regex.Match(template, pattern).Success) |
|||
template = Regex.Replace(template, pattern, "$1"); |
|||
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template |
|||
while (Regex.Match(template, pattern).Success) |
|||
template = Regex.Replace(template, pattern, "$1"); |
|||
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines |
|||
while (Regex.Match(template, pattern).Success) |
while (Regex.Match(template, pattern).Success) |
||
template = Regex.Replace(template, pattern, "$1"); |
template = Regex.Replace(template, pattern, "$1"); |
Revision as of 21:35, 4 June 2019
The next version of the Module:Citation/CS1 suite will deprecate |dead-url=
and |deadurl=
because these two parameters violate the nominal standard that says that parameters ending in -url
hold a url as a value.
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Sandbox | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. |
Sandbox | Title. Archived from the original on 2015-05-19. |
The purpose of task 15 is to replace various combinations of |dead-url=
and |deadurl=
and their associated keywords with |url-status=
and its appropriate keywords.
description
|dead-url=
and |deadurl=
accept a limited set of keywords that control the rendering of cs1|2 citation templates that have archive urls. The keywords that concern this task are:
yes
,y
,true
,no
the remaining keywords, retain their meaning and purpose:
unfit
,usurped
,bot: unknown
Because |url-status=no
and |url-status=yes
(and the other 'positive' keywords) are nonsensical, live
(replacing no
) and dead
(replacing yes
...) have been assigned to this parameter.
Task 15 searches for templates that use either of the |dead-url=
and |deadurl=
parameters (with or without assigned keyword) and then:
- renames the parameter to
url-status
- replaces the assigned keyword
no
withlive
, and replaces the assigned keywordsyes
,y
, andtrue
withdead
; keywordsunfit
,usurped
,bot: unknown
are retained - deletes all empty parameters (will delete an empty
|url-status=
parameter)
|dead-url=
and |deadurl=
without an assigned keyword are intentionally included in this process so that the deprecated, and ultimately unsupported, parameters don't linger in article space.
Still to be decided: dead
is the default value when |url-status=
is omitted or empty. As such, |url-status=dead
is redundant so it may be appropriate to simply delete |dead-url=
and |deadurl=
when they hold yes
, y
, or true
.
edit summaries
Task 15 writes an edit summary message that tallies the number of replacements and the number of deletions. The message has the form:
- replaced (n×) / removed (n×) deprecated |dead-url= and |deadurl= with |url-status=;
the edit summary has a link to this page.
ancillary tasks
Deletes all empty parameters from templates that are repaired.
This task does not do awb general fixes.
script
// remove, replace |deadurl= and |dead-url= with |url-status=
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead'
// when assigned value is 'no', replace assigned value with 'live'
// when empty, and when |archive-url= is empty, delete
// when empty, and when |archive-url= has a value, retain but do not fill
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false; // for development, never skip; for the bot set this true then when fixes are made, set it false
string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|newspaper|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
string pattern;
int fixed_count = 0;
int deleted_count = 0;
//---------------------------< S T A R T >--------------------------------------------------------------------
ArticleText = hide (ArticleText, IS_CS1, true); // hide all templates that aren't cs1 templates & hide wikilinks
//---------------------------< R E N A M E D E A D U R L >--------------------------------------------------
//
// renames |deadurl= and |dead-url= to |url-status=; replace assigned values. Empty |deadurl= and |dead-url=
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied
// and 'filled in' by well meaning editors.
//
// When |archive-url= has a value and |url-status= is present but empty, leave |url-status= in place.
//
pattern = @"\{\{\s*" + IS_CS1 + @"[^}]*\|\s*dead\-?url[^\}]+\}\}"; // cs1|2 template has one of |dead-url= or |deadurl= with or without a value
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a fixed citation template is assembled here
string raw_template = match.Groups[0].Value; // the whole citation template; if we can't fix the template then return raw_template
pattern = @"dead\-?url";
fixed_template = Regex.Replace (raw_template, pattern, "url-status"); // replace parameter name
pattern = @"(url\-status\s*=\s*)(?:yes|y|true)";
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead'
pattern = @"(url\-status\s*=\s*)no";
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live'
pattern = @"\|\s*archive\-?url\s*=\s*[/\w]";
if (Regex.Match (fixed_template, pattern).Success) // if |archive-url= is present and has a value
{
pattern = @"(\|\s*url\-status\s*=)(\s*[\|\}])";
fixed_template = Regex.Replace (fixed_template, pattern, "$1__3MP7Y__$2"); // if |url-status= is empty add empty secret word
}
fixed_template = empty_param_remove (fixed_template); // remove all empty parameters from this template
pattern = @"__3MP7Y__";
fixed_template = Regex.Replace (fixed_template, pattern, ""); // remove empty secret word
pattern = @"url\-status";
if (!Regex.Match (fixed_template, pattern).Success)
deleted_count++;
else
fixed_count++;
return fixed_template;
});
}
//---------------------------< F I N I S H >------------------------------------------------------------------
ArticleText = unhide (ArticleText, true); // unhide all that is hidden
if ((0 == deleted_count) && (0 == fixed_count))
{
Summary = @"";
Skip = true;
}
else
Summary = "[[User:Monkbot/task 15: remove replace deprecated dead-url params|Task 15]] (developmental testing):";
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;";
return ArticleText;
}
//===========================<< S U P P O R T >>==============================================================
//---------------------------< H I D E >----------------------------------------------------------------------
//
// HIDE TEMPLATES: find templates that are not <dont_hide>; replace the opening {{ with __0P3N__ and the closing }} with __CL0S3__
//
private string hide (string ArticleText, string dont_hide, bool wikilinks)
{
string pattern = @"\{\{(?!\s*" + dont_hide + @")[^\{\}]*\}\}";
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a hidden template is assembled here
string raw_template = match.Groups[0].Value; // the whole template
pattern = @"\{\{"; // hide the opening {{
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__");
pattern = @"\}\}"; // hide the closing }}
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__");
pattern = @"\|"; // and hide the pipes
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__");
return fixed_template;
});
}
pattern = @"([^\{])\{([^\{])"; // single opening curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__0CU!21Y__$2");
pattern = @"([^\}])\}([^\}])"; // single closing curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2");
if (wikilinks)
{
pattern = @"\[\[(?![Ff]ile|[Ii]mage)([^\|\]]+)\|([^\]]+)\]\]"; // HIDE complex wikilinks: [[article title|label]] to __WL1NK_O__article title__P1P3__label__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__P1P3__$2__WL1NK_C__"); // [[File: with wikilinks inside can be confusing
pattern = @"\[\[([^\]]+)\]\]"; // HIDE simple wikilinks: [[article title]] to __WL1NK_O__article title__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__WL1NK_C__");
}
return ArticleText;
}
//---------------------------< U N H I D E >------------------------------------------------------------------
//
// UNHIDE TEMPLATES: find templates that are hidden; replace the 'hide' strings with the appropriate wiki markup
//
private string unhide (string ArticleText, bool wikilinks)
{
if (wikilinks)
{
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_O__", "[["); // UNHIDE: replace __WL1NK_O__ with [[
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_C__", "]]"); // UNHIDE: replace __WL1NK_C__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with |
}
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with {
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with }
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}"); // UNHIDE: replace __CL0S3__ with }}
return ArticleText;
}
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------
//
// This function removes all empty named parameters from a template, attempting to leave what remains the same form.
//
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single
// line of text, vertical – all parameter written singly one-to-a-line, and a mix of the two – multiple lines
// where each has one or more parameters.
//
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '='
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline
// 3. for any, empties are followed by pipe closing }; remove the empty but leave the | or }
// 4. the preceding steps can leave blank lines; remove the blank lines
//
private string empty_param_remove (string template)
{
string pattern = @"(\|[^=]+=[ \t]*)[\r\n]+(?!\s*[\|\}])"; // parameter name & '=' on one line, value on a following line
while (Regex.Match(template, pattern).Success) // put them on the same line
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
return template;
}