#!/usr/bin/env python2.5
# Copyright 2010 bjweeks, MZMcBride
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import datetime
import MySQLdb, MySQLdb.cursors
import re
import wikitools
import settings
excluded_titles = [
jobs = [
'Aerospace engineers[,\n]',
'Air traffic controllers[,\n]',
'Animal trainings[,\n]',
'Animal trainers[,\n]',
'Art dealers[,\n]',
'Athletic trainers[,\n]',
'Bank tellers[,\n]',
'Beauty therapists[,\n]',
'Brain surgeons[,\n]',
'Cab drivers[,\n]',
'Car designers[,\n]',
'Chess players[,\n]',
'Chief compliance officers[,\n]',
'Chief executive officers[,\n]',
'Chief information officers[,\n]',
'Chief financial officers[,\n]',
'Chief technology officers[,\n]',
'Chief privacy officers[,\n]',
'Chief of polices[,\n]',
'Chimney sweeps[,\n]',
'Civil servants[,\n]',
'Civil engineers[,\n]',
'Coast guards[,\n]',
'Company secretaries[,\n]',
'Computer programmers[,\n]',
'Construction engineers[,\n]',
'Construction workers[,\n]',
'General contractors[,\n]',
'Corrections officers[,\n]',
'Costume designers[,\n]',
'customer service advisors[,\n]',
'Customer service representatives[,\n]',
'Customs officers[,\n]',
'Disc jockeys[,\n]',
'Underwater divings[,\n]',
'Dog walkers[,\n]',
'Fashion designers[,\n]',
'Film directors[,\n]',
'Film producers[,\n]',
'Financial advisers[,\n]',
'Fire marshals[,\n]',
'Fire Safety Officers[,\n]',
'First Mates[,\n]',
'Flight attendants[,\n]',
'Flight instructors[,\n]',
'Food critics[,\n]',
'Fortune tellers[,\n]',
'Funeral directors[,\n]',
'Game designers[,\n]',
'Game wardens[,\n]',
'Government agents[,\n]',
'Graphic designers[,\n]',
'Hotel managers[,\n]',
'Image consultants[,\n]',
'Industrial engineers[,\n]',
'Information Technologists[,\n]',
'Interior designers[,\n]',
'Investment bankers[,\n]',
'Investment brokers[,\n]',
'Karate masters[,\n]',
'kindergarten teachers[,\n]',
'Loan officers[,\n]',
'Law enforcement agents[,\n]',
'Leather workers[,\n]',
'Level designers[,\n]',
'Lighthouse keepers[,\n]',
'Lighting technicians[,\n]',
'Loan officers[,\n]',
'Mailman or Mail carriers[,\n]',
'Make-up artists[,\n]',
'Management consultants[,\n]',
'Marine biologists[,\n]',
'Market gardeners[,\n]',
'Martial artists[,\n]',
'Master of business administrations[,\n]',
'Massage therapists[,\n]',
'Mechanical Engineers[,\n]',
'Medical billings[,\n]',
'Medical billers[,\n]',
'Medical Laboratory Scientists[,\n]',
'Medical Transcriptionists[,\n]',
'Bicycle messengers[,\n]',
'Mortgage brokers[,\n]',
'Music educators[,\n]',
'Night auditors[,\n]',
'Notary publics[,\n]',
'Occupational therapists[,\n]',
'Ordinary Seamans[,\n]',
'Park rangers[,\n]',
'Parole Officers[,\n]',
'Patent attorneys[,\n]',
'Patent examiners[,\n]',
'Personal Trainers[,\n]',
'Physical Therapists[,\n]',
'Physician Assistants[,\n]',
'Piano tuners[,\n]',
'Police inspectors[,\n]',
'Press officers[,\n]',
'Prison officers[,\n]',
'Private detectives[,\n]',
'Probation Officers[,\n]',
'Product designers[,\n]',
'Professional dominants[,\n]',
'Project Managers[,\n]',
'Press officers[,\n]',
'Public Relations Officers[,\n]',
'Public speakers[,\n]',
'Porn stars[,\n]',
'Queen consorts[,\n]',
'Queen regnants[,\n]',
'Real estate brokers[,\n]',
'Real estate investors[,\n]',
'Real estate developers[,\n]',
'Record producers[,\n]',
'Refuse collectors[,\n]',
'Registered Nurses[,\n]',
'Respiratory Therapists[,\n]',
'Rubbish Collectors[,\n]',
'Sex Slaves[,\n]',
'Sanitation workers[,\n]',
'School superintendents[,\n]',
'Second Mates[,\n]',
'Secret service agents[,\n]',
'Secretary generals[,\n]',
'Security guards[,\n]',
'Search Engine Optimizations[,\n]',
'Search engine optimizers[,\n]',
'Sheriff officers[,\n]',
'Shop assistants[,\n]',
'Social workers[,\n]',
'Software engineerings[,\n]',
'Software engineers[,\n]',
'Soil sciences[,\n]',
'Soil scientists[,\n]',
'Sound Engineers[,\n]',
'Special agents[,\n]',
'Speech therapists[,\n]',
'Street artists[,\n]',
'Street musicians[,\n]',
'Street sweepers[,\n]',
'Street vendors[,\n]',
'Structural engineerss[,\n]',
'Stunt doubles[,\n]',
'Stunt performers[,\n]',
'Switchboard operators[,\n]',
'System administrators[,\n]',
'Systems analysts[,\n]',
'Tax collectors[,\n]',
'Tax lawyers[,\n]',
'Taxicab drivers[,\n]',
'Tea ladies[,\n]',
'Technical writers[,\n]',
'Telegraph operators[,\n]',
'Telephone operators[,\n]',
'Tennis players[,\n]',
'Quality controls[,\n]',
'Test developers[,\n]',
'Test pilots[,\n]',
'Theatre directors[,\n]',
'Tour Guides[,\n]',
'Trademark attorneys[,\n]',
'Transit planners[,\n]',
'Transport Planners[,\n]',
'Truck drivers[,\n]',
'Undercover agents[,\n]',
'Urban planners[,\n]',
'Underwear models[,\n]',
'Video editors[,\n]',
'Video game developers[,\n]',
'List of violinistss[,\n]',
'Voice Actors[,\n]',
'Waiting staffs[,\n]',
'weather forecastings[,\n]',
'Web designers[,\n]',
'Web developers[,\n]',
'Wedding planners[,\n]',
'Wet nurses[,\n]',
'Wood cutters[,\n]',
'X-ray Operators[,\n]',
'Yinder Hos[,\n]',
'zen masters[,\n]',
'zoo veternarians[,\n]',
excluded_categories = [
excluded_templates = [
excluded_titles_re = re.compile(r'(%s)' % '|'.join(str(i) for i in excluded_titles))
jobs_re = re.compile(r'(%s)' % '|'.join(str(i) for i in jobs), re.I|re.U)
excluded_categories_re = re.compile(r'(%s)' % '|'.join(str(i) for i in excluded_categories), re.I|re.U)
excluded_templates_re = re.compile(r'(%s)' % '|'.join(str(i) for i in excluded_templates), re.I|re.U)
capital_letters_re = re.compile(r'[A-Z]')
report_title = settings.rootpage + 'Potential biographies of living people (4)'
report_template = u'''
Articles that potentially need to be in [[:Category:Living people]] (limited to the first 2000 \
entries). List generated mostly using magic; data as of <onlyinclude>%s</onlyinclude>.
{| class="wikitable sortable plainlinks" style="width:100%%; margin:auto;"
|- style="white-space:nowrap;"
! No.
! Biography
wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)
conn = MySQLdb.connect(host=settings.host, db=settings.dbname, read_default_file='~/.my.cnf', cursorclass=MySQLdb.cursors.SSCursor)
cursor = conn.cursor()
cursor.execute('SET SESSION group_concat_max_len = 1000000;')
/* potenshblps4.py SLOW_OK */
FROM page
LEFT JOIN templatelinks
ON tl_from = page_id
LEFT JOIN categorylinks
ON cl_from = page_id
WHERE page_namespace = 0
AND page_is_redirect = 0
GROUP BY page_id
LIMIT 100000;
i = 1
output = []
while True:
row = cursor.fetchone()
if i > 2000:
if row == None:
page_title = u'%s' % unicode(row[0], 'utf-8')
if row[1] is not None:
cl_to = u'%s' % unicode(row[1], 'utf-8')
cl_to = 'NULL'
if row[2] is not None:
tl_title = u'%s' % unicode(row[2], 'utf-8')
tl_title = ''
if (
not excluded_categories_re.search(cl_to) and
not excluded_titles_re.search(page_title) and
page_title.find('_') != -1 and
jobs_re.search(cl_to) and
len(capital_letters_re.findall(page_title)) > 1 and
not excluded_templates_re.search(tl_title)
table_row = u'''| %d
| [[%s]]
|-''' % (i, page_title)
i += 1
cursor = conn.cursor()
cursor.execute('SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1;')
rep_lag = cursor.fetchone()[0]
current_of = (datetime.datetime.utcnow() - datetime.timedelta(seconds=rep_lag)).strftime('%H:%M, %d %B %Y (UTC)')
report = wikitools.Page(wiki, report_title)
report_text = report_template % (current_of, '\n'.join(output))
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)