Wikipedia:Orphaned Articles/How to update

From Wikipedia, the free encyclopedia

Apart from "manual" updating of the Wikipedia:Orphaned Articles list, the automatic routine described on this page can be used.

#Use at your own risk, no warranty implied or given
#Please debug/improve the query.

DROP TABLE IF EXISTS temp_deorphan;

#  Define as the links table, droping some keys
CREATE TABLE `temp_deorphan` (
  `l_from` int(8) unsigned NOT NULL default '0',
  `l_to` int(8) unsigned NOT NULL default '0',
  KEY `l_from` (`l_from`),
  KEY `l_to` (`l_to`)
) TYPE=MyISAM; 

# This is slow .. links has over 6 mio entries
# 
INSERT INTO temp_deorphan SELECT l_from, l_to
FROM links
LIMIT 10000000;

# Remove links that don't de-orphan pages
# This version does two in one 
#    1.1 millon rows
DELETE 
 FROM temp_deorphan 
USING temp_deorphan, cur
WHERE l_from = cur_id
      AND (cur_namespace <>0    #links from namespaces other than the article namespace.
      OR cur_is_redirect=1);          #links from redirects 

#find cur_id of disambig marker (Template:Disambig)
DROP TABLE IF EXISTS temp_disambigid;

CREATE TABLE temp_disambigid
SELECT cur_id AS d_id
    FROM cur
WHERE (cur_title = 'Disambig')
      AND cur_namespace=10;   #     10 = Template namespace

#links from disambiguation pages are not included
#i.e. a page linked only from a disambiguation page is an orphan
# approx. 100000 rows
DELETE 
 FROM temp_deorphan 
USING temp_disambigid AS id, links AS l, temp_deorphan AS d 
WHERE l.l_to = id.d_id
AND l.l_from=d.l_from;

#This avoids that disambiguation pages show up as orphans
INSERT INTO temp_deorphan
 SELECT DISTINCT 999999, l_from
    FROM links, temp_disambigid
WHERE d_id = l_to
LIMIT 20000;

#Adds a temporary table with the orphans
#12000 rows
DROP TABLE IF EXISTS temp_orphans;
CREATE TABLE temp_orphans 
SELECT cur_id 
FROM cur 
LEFT JOIN temp_deorphan ON cur_id=l_to
WHERE l_to IS NULL 
        AND cur_namespace=0 
        AND cur_is_redirect=0
LIMIT 20000;


#Output from list (filtering some  600 rambot orphans)
SELECT CONCAT('#[[', REPLACE (cur_title, '_', ' '), ']]') AS orphanslist
INTO OUTFILE 'wp:\wp_orphanend_articles.txt'  #set this to path you need
FROM cur, temp_orphans
WHERE temp_orphans.cur_id = cur.cur_id
AND NOT (cur_title LIKE '%(CDP)%'
        OR cur_title LIKE '%(town)%' 
        OR cur_title LIKE '%(city)%' 
        OR cur_title LIKE '%(village)%' 
        OR cur_title LIKE '%Township%')
ORDER BY Lower(cur_title)
LIMIT 20000;

Note: these queries are slow.