PROWAREtech

articles » current » dot-net » extract-title-from-html

.NET: Extract the Page Title from HTML

How to find the title in an HTML page using regular expressions (RegEx) in C#.

using System;
using System.Text.RegularExpressions;

namespace ExtractTitle
{
	internal class Program
	{
		static void Main(string[] args)
		{
			string html = @"
<!DOCTYPE html>
<html lang=""en"">
<head>
	<meta charset=""UTF-8"">
	<meta name=""viewport"" content=""width=device-width, initial-scale=1.0"">
	<title>Hello, World</title>
</head>
<body>
	<p>
		Lorem ipsum dolor sit amet consectetur adipisicing elit.
		Cupiditate itaque autem quasi qui, culpa accusamus
		repellat sed officia reiciendis, expedita perferendis
		odit rerum minus eius quod dolores facilis optio debitis?
	</p>
</body>
</html>
";
			string title = Regex.Match(html, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;
			Console.WriteLine(title);
		}
	}
}

This site uses cookies. Cookies are simple text files stored on the user's computer. They are used for adding features and security to this site. Read the privacy policy.
CLOSE