Wikipedia:Orphaned Articles/How to update
From Wikipedia, the free encyclopedia
Apart from "manual" updating of the Wikipedia:Orphaned Articles list, the automatic routine described on this page can be used.
#Use at your own risk, no warranty implied or given #Please debug/improve the query. DROP TABLE IF EXISTS temp_deorphan; # Define as the links table, droping some keys CREATE TABLE `temp_deorphan` ( `l_from` int(8) unsigned NOT NULL default '0', `l_to` int(8) unsigned NOT NULL default '0', KEY `l_from` (`l_from`), KEY `l_to` (`l_to`) ) TYPE=MyISAM; # This is slow .. links has over 6 mio entries # INSERT INTO temp_deorphan SELECT l_from, l_to FROM links LIMIT 10000000; # Remove links that don't de-orphan pages # This version does two in one # 1.1 millon rows DELETE FROM temp_deorphan USING temp_deorphan, cur WHERE l_from = cur_id AND (cur_namespace <>0 #links from namespaces other than the article namespace. OR cur_is_redirect=1); #links from redirects #find cur_id of disambig marker (Template:Disambig) DROP TABLE IF EXISTS temp_disambigid; CREATE TABLE temp_disambigid SELECT cur_id AS d_id FROM cur WHERE (cur_title = 'Disambig') AND cur_namespace=10; # 10 = Template namespace #links from disambiguation pages are not included #i.e. a page linked only from a disambiguation page is an orphan # approx. 100000 rows DELETE FROM temp_deorphan USING temp_disambigid AS id, links AS l, temp_deorphan AS d WHERE l.l_to = id.d_id AND l.l_from=d.l_from; #This avoids that disambiguation pages show up as orphans INSERT INTO temp_deorphan SELECT DISTINCT 999999, l_from FROM links, temp_disambigid WHERE d_id = l_to LIMIT 20000; #Adds a temporary table with the orphans #12000 rows DROP TABLE IF EXISTS temp_orphans; CREATE TABLE temp_orphans SELECT cur_id FROM cur LEFT JOIN temp_deorphan ON cur_id=l_to WHERE l_to IS NULL AND cur_namespace=0 AND cur_is_redirect=0 LIMIT 20000; #Output from list (filtering some 600 rambot orphans) SELECT CONCAT('#[[', REPLACE (cur_title, '_', ' '), ']]') AS orphanslist INTO OUTFILE 'wp:\wp_orphanend_articles.txt' #set this to path you need FROM cur, temp_orphans WHERE temp_orphans.cur_id = cur.cur_id AND NOT (cur_title LIKE '%(CDP)%' OR cur_title LIKE '%(town)%' OR cur_title LIKE '%(city)%' OR cur_title LIKE '%(village)%' OR cur_title LIKE '%Township%') ORDER BY Lower(cur_title) LIMIT 20000;
Note: these queries are slow.