PHP code
The sitemap.xml file is very important for SEO, because search engine robots reads it, and based on its content, it crawl your website making sure, the pages will be possible to search for at search engines like Google.com, Bing.com, etc...
<?php
$servername = "localhost";
$username = "root";
$password = "";
$database = "mydb";
$conn = new mysqli($servername, $username, $password, $database); // Create connection
?>
<?php
//////////////////////////////////establishing the DB connection and variables definittion//////////////////////////////////////////////////////////
require_once('config.php'); //(the definition and the DB variables to connect)
if ($conn->connect_error) { //DB connectivity checking
die("Connection failed: " . $conn->connect_error);
}
set_time_limit(7200); //120minutes - prevents expiration of run time of the PHP script - can be adjusted
$protocol = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] === 'on' ? "https://" : "http://"; //is used http or https?
$domainName = $_SERVER['HTTP_HOST']; //www.mydomain.com
$basePath = dirname($_SERVER['REQUEST_URI']); //e.g. category
$fullURL = $protocol . $domainName . $basePath; //https://www.mydomain.com/category
$xml = new SimpleXMLElement('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"></urlset>'); //XML startup header
//////////////////////////////////establishing the DB connection and variables definittion//////////////////////////////////////////////////////////
function remove_accents_and_keep_minuses($str) {
$patterns = array('\'','A','Á','B','C','Č','D','Ď','E','É','Ě','F','G','H','I','Í','J','K','L','M','N','Ň','O','Ó','P','Q','R','Ř','S','Š','T','Ť','U','Ú','Ů','V','W','X','Y','Ý','Z','Ž','a','á','b','c','č','d','ď','e','é','ě','f','g','h','i','í','j','k','l','m','n','ň','o','ó','p','q','r','ř','s','š','t','ť','u','ú','ů','v','w','x','y','ý','z','ž','0','1','2','3','4','5','6','7','8','9',' ','{','}','[',']','+','@','#','$','%','^','&','*','(',')','_','=','~','`',';','°','|','.',',','\\','?','/','<','>','×','÷','€','£','¥','*','†','§','©','®','™','…','←','→','↑','↓'); //The Characters to search in the text string for replacements
$replacements = array('','A','Á','B','C','Č','D','Ď','E','É','Ě','F','G','H','I','Í','J','K','L','M','N','Ň','O','Ó','P','Q','R','Ř','S','Š','T','Ť','U','Ú','Ů','V','W','X','Y','Ý','Z','Ž','a','á','b','c','č','d','ď','e','é','ě','f','g','h','i','í','j','k','l','m','n','ň','o','ó','p','q','r','ř','s','š','t','ť','u','ú','ů','v','w','x','y','ý','z','ž','0','1','2','3','4','5','6','7','8','9',' ','','','','','','','','','%','','','','','','-','-','','','','','','','','','','','','','','','','','','','','','','','','','','','','','--','---','----','-----'); //The Characters to use in the correctly visible title - usage on the web page
$replacementsUrl = array('','A','A','B','C','C','D','D','E','E','E','F','G','H','I','I','J','K','L','M','N','N','O','O','P','Q','R','R','S','S','T','T','U','U','U','V','W','X','Y','Y','Z','Z','a','a','b','c','c','d','ď','e','e','e','f','g','h','i','i','j','k','l','m','n','n','o','o','p','q','r','r','s','s','t','t','u','u','u','v','w','x','y','y','z','z','0','1','2','3','4','5','6','7','8','9','-','','','','','','','','','percentage','','','','','','-','-','','','','','','','','','','','','','','','','','','','','','','','','','','','','','-','-','-','-'); //The array for usage in URL addresses - prevention against URL-ENCODED chars
$title = str_replace($patterns, $replacements, $str); //removed unstandard characters (used patterns and replacements arrays. The arrays can be adjusted, but indexes must fit properly
$pageurl = strtolower(str_replace($patterns, $replacementsUrl, $title)); //Lowered URL address - the protection against duplicate addresses in SEO
$str = iconv('UTF-8', 'ASCII//TRANSLIT', $pageurl); // Removal of the accented characters in the string
$str = str_replace(' ', '-', $str); // Spaces by minuses replacement
$str = preg_replace('/-+/', '-', $str); // Multiplied minuses replacement by single in the string
return $str;
}
$sql = "SELECT lower(`title`) AS `url_address` FROM `mytable` ORDER BY `id` DESC LIMIT 50000;";
$result = $conn->query($sql);
if ($result->num_rows > 0) {
while($row = $result->fetch_assoc()) { //The iteration through all URL values, to add them to the sitemap.xml file
$url = $xml->addChild('url'); //return the url element
$url->addChild('loc', ($fullURL."/".remove_accents_and_keep_minuses($row['url_adresa']))); //full URL address to add
$url->addChild('priority', 0.8); //The priority for the robots
$url->addChild('changefreq', 'weekly'); //The chance of changes on the URL address
$url->addChild('lastmod', (date("Y-m-d"))); //This is the current date. Better would be to add the date of the page creation
$xml->asXML('sitemap.xml'); //export to the file sitemap.xml
}
//////////////////////////////////after the script development finished can be commented or removed//////////////////////////////////////////////////////////
echo "Sitemap file was generated properly.<br />\r\n"; //after the development can be removed
//////////////////////////////////after the script development finished can be commented or removed//////////////////////////////////////////////////////////
} else {
//////////////////////////////////after the script development finished can be commented or removed//////////////////////////////////////////////////////////
echo "No URL addresses were found in the SQL Select."; //after the development can be removed
//////////////////////////////////after the script development finished can be commented or removed//////////////////////////////////////////////////////////
}
$conn->close(); //Closure of the MySQL connection
?>