Wikipedia:Database reports/Ownerless pages in the user space/Configuration

This report is updated every day.

Source code

edit
/*
Copyright 2008 bjweeks, MZMcBride
Copyright 2022 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
use anyhow::Result;
use dbreps2::{linker, str_vec, Frequency, Report};
use mysql_async::prelude::*;
use mysql_async::Conn;

struct FirstRow {
    page_id: u64,
    page_namespace: u32,
    page_title: String,
    page_len: u64,
}

struct SecondRow {
    rev_timestamp: String,
    actor_name: String,
}

pub struct Row {
    page_namespace: u32,
    page_title: String,
    length: u64,
    creator: String,
    creation_date: String,
}

async fn user_exists_globally(ca_conn: &mut Conn, name: &str) -> Result<bool> {
    let row: Option<usize> = ca_conn
        .exec_first(
            r#"
SELECT
1
FROM globaluser
WHERE gu_name = ?
"#,
            (name,),
        )
        .await?;
    Ok(row.is_some())
}

async fn lookup_revision(
    conn: &mut Conn,
    row: &FirstRow,
) -> Result<Option<SecondRow>> {
    Ok(conn
        .exec_map(
            r#"
SELECT
  rev_timestamp,
  actor_name
FROM
  page
  JOIN revision ON page_id = rev_page
  JOIN actor ON rev_actor = actor_id
WHERE
  page_id = ?
ORDER BY
  rev_timestamp ASC
LIMIT
  1;
"#,
            (row.page_id,),
            |(rev_timestamp, actor_name)| SecondRow {
                rev_timestamp,
                actor_name,
            },
        )
        .await?
        .into_iter()
        .next())
}

pub struct Ownerlessuserpages {}

impl Report<Row> for Ownerlessuserpages {
    fn title(&self) -> &'static str {
        "Ownerless pages in the user space"
    }

    fn frequency(&self) -> Frequency {
        Frequency::Daily
    }

    fn query(&self) -> &'static str {
        r"
/* ownerlessuserpages.rs SLOW_OK */
SELECT
  page_id,
  page_namespace,
  page_title,
  page_len
FROM
  page
  LEFT JOIN user ON user_name = REPLACE(SUBSTRING_INDEX(page_title, '/', 1), '_', ' ')
WHERE
  page_namespace IN (2, 3)
  AND page_is_redirect = 0
  AND NOT IS_IPV4(SUBSTRING_INDEX(page_title, '/', 1))
  AND NOT IS_IPV6(SUBSTRING_INDEX(page_title, '/', 1))
  AND page_title NOT RLIKE '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
  AND ISNULL(user_name);
"
    }

    async fn run_query(&self, conn: &mut Conn) -> Result<Vec<Row>> {
        let rows = conn
            .query_map(
                self.query(),
                |(page_id, page_namespace, page_title, page_len)| FirstRow {
                    page_id,
                    page_namespace,
                    page_title,
                    page_len,
                },
            )
            .await?;
        let ca_pool = self.centralauth()?;
        let mut ca_conn = ca_pool.get_conn().await?;
        let mut last = vec![];
        for row in rows {
            let username = row.page_title.replace('_', " ");
            let username = if username.contains('/') {
                let (username, _) = username.split_once('/').unwrap();
                username.to_string()
            } else {
                username
            };
            if user_exists_globally(&mut ca_conn, &username).await? {
                continue;
            }
            let Some(rev) = lookup_revision(conn, &row).await? else {
                continue;
            };
            last.push(Row {
                page_namespace: row.page_namespace,
                page_title: row.page_title,
                length: row.page_len,
                creator: rev.actor_name,
                creation_date: rev.rev_timestamp,
            })
        }
        Ok(last)
    }

    fn intro(&self) -> &'static str {
        "Pages in the user space that do not belong to a [[Special:ListUsers|registered user]]"
    }

    fn headings(&self) -> Vec<&'static str> {
        vec!["Page", "Length", "Creator", "Creation date"]
    }

    fn format_row(&self, row: &Row) -> Vec<String> {
        str_vec![
            linker(row.page_namespace, &row.page_title),
            row.length,
            row.creator,
            row.creation_date
        ]
    }

    fn code(&self) -> &'static str {
        include_str!("ownerlessuserpages.rs")
    }
}