articles » current » dot-net » extract-title-from-html

.NET: Extract the Page Title from HTML

How to find the title in an HTML page using regular expressions (RegEx) in C#.

using System;
using System.Text.RegularExpressions;

namespace ExtractTitle
	internal class Program
		static void Main(string[] args)
			string html = @"
<!DOCTYPE html>
<html lang=""en"">
	<meta charset=""UTF-8"">
	<meta name=""viewport"" content=""width=device-width, initial-scale=1.0"">
	<title>Hello, World</title>
		Lorem ipsum dolor sit amet consectetur adipisicing elit.
		Cupiditate itaque autem quasi qui, culpa accusamus
		repellat sed officia reiciendis, expedita perferendis
		odit rerum minus eius quod dolores facilis optio debitis?
			string title = Regex.Match(html, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;

