简介
虽然身份管理问题的工作,管理员常常面临两难重新格式化用户的履历资料。当用户的姓名和头衔是不可取的不区分大小写格式存储,身份管理管理员姓氏重新格式化。姓氏和头衔,目前的第一个字母的大小写,由于非标准名称的拼写和缩略语的拼写困难。
该解决方案解决什么问题
"正确casequot;资本履历资料的用户或任何其他字符串。这是如何帮助别人?
系统管理员和身份管理专业人员可以使用这种格式提供过程是不区分大小写的数据源中存储的数据。使用代码
使用这种格式的供应商,用户应包括失物招领的身份进入他们的项目适当的情况下格式提供者,并以下列方式使用:
没有McOption{C}string improper = " MRs. De'MArLeY-Smith mccarthy IV Sr. PhD ";
结果:
随着McOptionMrs. De'Marley-Smith Mccarthy IV Sr. PhD
string result = string.Format(new LafiProperCaseFormatProvider(), "{0:mc}", improper);
结果:
的代码实际上是如何工作Mrs. De'Marley-Smith McCarthy IV Sr. PhD
失物招领的身份适当的情况下格式提供者是IFormatProvider接口的一个实现。
格式提供分割字符串的quot; spacequot;字符,然后删除任何多余的空白,并适用于特殊的资本化的规则(罗马数字,打招呼,像博士等头衔,)的几种模式,此后,该字符串分裂quot; hyphensquot;和quot; apostrophesquot;复姓词和复合词在中间的撇号,以确保适当的资本。
如果用户指定的情况下quot; mquot;或"mcquot;(McOption),爱尔兰/苏格兰的名字将进入模式分析。此选项是特别棘手,因为它可以产生不良的结果non-Irish/Scottish名称。考虑quot; MacDonaldquot;与"Macadoquot;格式提供者,不会利用马查多到"; MaChadoquot",一般是不可取的。为了解决这个问题,在身份管理项目,你应该使用属性流的优先级和一个专用的数据源,其中将包含例外情况下资本。 (见我的博客:{A1})注意:
这段代码的设计和测试身份管理的范围内适当的情况下格式化。应用此格式提供一般的文字,可能会产生不良后果。//-----------------------------------------------------------------------
// <copyright file="LafiProperCaseFormatProvider.cs" company="LostAndFoundIdentity">
// Copyright (c) 2007 LostAndFoundIdentity.com | All rights reserved.
// </copyright>
// <author> Dmitry Kazantsev </author>
//-----------------------------------------------------------------------
[assembly: System.CLSCompliant(true)]
namespace LostAndFoundIdentity.Text
{
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
/// <summary>
/// Represents Lost and Found Identity class LafiProperCaseFormatProvider
/// which is responsible for providing custom proper case string formatting
/// </summary>
[SuppressMessage("Microsoft.Naming",
"CA1704:IdentifiersShouldBeSpelledCorrectly",
MessageId = "Lafi",
Justification = "Nothing wrong with 'Lafi'; It is stands for Lost and Found Identity")]
public class LafiProperCaseFormatProvider : ICustomFormatter, IFormatProvider
{
#region Fields
/// <summary>
/// String representing space character
/// </summary>
private const string Space = " ";
/// <summary>
/// Boolean containing value representing user's desire
/// to look for and format strings with Mc/Mac formatting algorithm
/// </summary>
private bool mcOption;
/// <summary>
/// Dictionary containing pattern name and and regex pattern
/// </summary>
private Dictionary<pattern,> patternDictionary;
#endregion Fields
#region Constructors
/// <summary>
/// Initializes a new instance of the LafiProperCaseFormatProvider class
/// </summary>
public LafiProperCaseFormatProvider()
{
this.InitializeDictionary();
}
#endregion Constructors
#region Enums
/// <summary>
/// Name of the pattern that could be present in the string
/// </summary>
private enum Pattern
{
/// <summary>
/// No pattern found
/// </summary>
None = 0,
/// <summary>
/// Represents patent where all letters must be capitalized
/// </summary>
AllUpperCase = 1,
/// <summary>
/// Represents pattern where first and last
/// letter of the work must be capitalized
/// </summary>
FirstAndLastCapitals = 2,
/// <summary>
/// Represents pattern where Mc and Mac must be distinguished from
/// the rest of the word by capitalizing character following the Mc or Mac
/// </summary>
McAndMac = 8,
/// <summary>
/// Represents patterns where string is a Roman Numeral
/// </summary>
RomanNumerals = 16,
/// <summary>
/// Represents pattern where string is a salutation
/// </summary>
Salutation = 32
}
#endregion Enums
#region Properties
/// <summary>
/// Gets or sets a value indicating whether user wishes
/// to look for Mc/Mac in the formatted string or not
/// </summary>
private bool McOption
{
get
{
return this.mcOption;
}
set
{
this.mcOption = value;
}
}
// Gets the Dictionary containing Patten name
// and correlated RegEx pattern "formula"
private Dictionary PatternDictionary
{
get
{
return this.patternDictionary;
}
}
#endregion Properties
#region Interface implementation
///
/// Formats provided string with a pre-defined template
///
/// Name of the format presented as {0:x}
/// Value to be formatted
/// The format provider class
/// Formatted string
public string Format(string format, object arg, IFormatProvider formatProvider)
{
string value = arg.ToString();
switch (format.ToUpperInvariant())
{
default:
{
return value;
}
case "M":
case "MAC":
case "MC":
{
this.McOption = true;
return this.FormatProperCase(value);
}
case "P":
{
this.McOption = false;
return this.FormatProperCase(value);
}
}
}
///
/// Gets type of the format
///
/// Format in question
/// Type of the format in question
public object GetFormat(Type formatType)
{
if (formatType == typeof(ICustomFormatter))
{
return this;
}
else
{
return null;
}
}
#endregion
#region Methods
///
/// Removes all white-space from the string in question
///
/// String to be processed
/// Reformatted string without whitespace
private static string ProcessWhitespace(string value)
{
//// Strip leading and trailing whitespace(s)
value = value.Trim().TrimStart().TrimEnd();
//// Replace all multiple occurrences of whitespace
//// characters (middle of the string) with a single space.
value = Regex.Replace(value, @"\s+", Space);
return value;
}
///
/// Determines which RegEx patters are applicable for a given string
///
/// The string to be examined
/// The Enum value of the pattern detected in the string
private Pattern DetectPattern(string value)
{
foreach (KeyValuePair pair in this.PatternDictionary)
{
if (Regex.IsMatch(value, pair.Value,
RegexOptions.IgnoreCase |
RegexOptions.CultureInvariant))
{
return pair.Key;
}
}
return Pattern.None;
}
///
/// Reformats provided value into properly capitalized string
///
/// String to be formatted
/// Properly capitalized string
[SuppressMessage("Microsoft.Globalization",
"CA1308:NormalizeStringsToUppercase",
Justification = "By design")]
private string FormatProperCase(string value)
{
//// String that will store the
StringBuilder output = new StringBuilder();
//// Remove white space from the word
value = ProcessWhitespace(value);
//// Process Each Word (separated by a single space)
foreach (string token in value.ToLowerInvariant().Split(' '))
{
//// Create temporary token
string tempToken = string.Empty;
Pattern pattern = this.DetectPattern(token);
switch (pattern)
{
case Pattern.Salutation:
{
//// Capitalizing first character in the current token
tempToken = token.Substring(0, 1).ToUpperInvariant() +
token.Substring(1);
break;
}
case Pattern.FirstAndLastCapitals:
{
//// Capitalizing first and Last characters of the string
Match matchedToken = Regex.Match(token,
this.PatternDictionary[Pattern.FirstAndLastCapitals],
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
tempToken = matchedToken.ToString().ToLowerInvariant();
tempToken = tempToken.Replace("p", "P");
tempToken = tempToken.Replace("l", "L");
tempToken = tempToken.Replace("d", "D");
break;
}
case Pattern.RomanNumerals:
case Pattern.AllUpperCase:
{
//// Capitalizing all characters of the current token
tempToken = token.ToUpperInvariant();
break;
}
case Pattern.McAndMac:
{
// Check whether Mc/Mac option is requested
if (this.McOption)
{
// Capitalizing First "M" and first
// character after the 'Mc' or 'Mac' of the current token
Match matchedToken = Regex.Match(token,
this.PatternDictionary[Pattern.McAndMac],
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
tempToken =
matchedToken.Groups[1].Value.Substring(0, 1).ToUpperInvariant();
tempToken += matchedToken.Groups[1].Value.Substring(1);
tempToken +=
matchedToken.Groups[2].Value.Substring(0, 1).ToUpperInvariant();
tempToken += matchedToken.Groups[2].Value.Substring(1);
}
else
{
//// Capitalizing first character in the current token
tempToken = token.Substring(0, 1).ToUpperInvariant() +
token.Substring(1);
}
break;
}
case Pattern.None:
{
//// Capitalizing first character of the current token
tempToken = token.Substring(0, 1).ToUpperInvariant() +
token.Substring(1);
break;
}
}
// Looking for the << - >> character
// as an indicator of "separated" token
if (token.IndexOf(@"-", StringComparison.OrdinalIgnoreCase) > -1)
{
//// Calling FormatSeparatedValue with separator character "-"
tempToken = this.FormatSeparatedValue(token, '-');
}
if (token.IndexOf(@"'", StringComparison.OrdinalIgnoreCase) > -1)
{
//// Calling FormatSeparatedValue with separator character "'"
tempToken = this.FormatSeparatedValue(token, '\');
}
output.AppendFormat(CultureInfo.CurrentCulture,
"{0}{1}", tempToken, Space);
}
// Returning trimmed value
return output.ToString().Trim();
}
///
/// Formats "separated" string to ensure that hyphenated
/// and apostrophe-separated strings are properly capitalized
///
/// Value to be processed
/// A separator character
/// Properly formatted "separated" string
private string FormatSeparatedValue(string value, char separator)
{
string[] multiPartValue = value.Split(separator);
StringBuilder result = new StringBuilder();
int lastPart = multiPartValue.Length - 1;
for (int i = 0; i < lastPart; i++)
{
if (multiPartValue[i].Length == 0)
{
result.Append(separator.ToString());
}
else
{
result.AppendFormat(CultureInfo.InvariantCulture, "{0}{1}",
this.FormatProperCase(multiPartValue[i]),
separator.ToString(CultureInfo.InvariantCulture));
}
}
if (multiPartValue[lastPart].Length > 0)
{
result.Append(this.FormatProperCase(multiPartValue[lastPart]));
}
return result.ToString();
}
///
/// Initializes dictionary of pattern names and regex "formulas"
///
private void InitializeDictionary()
{
// a regular expression to define salutations for the proper case function
string salutations =
@"(^m(r|s)\.?$)|(^mrs\.?$)|(^mi(s){2}\.?$)|(^(j|s)r\.?,?$)";
// a regular expression string to match PhD or LegD and any variants with periods
string firstLastCap = @"(^leg\.?d\.?,?$)|(^ph\.?d\.?,?$)";
// a regular expression string that matches degrees and professional designations
//// and ensures that they are in all caps
//// this will match: MVP and MCP, DSC, CNA, CCNA
//// and CCNP, MCSE and MCSA and MCSD, CISM and CISA
//// DDS, RN, MD and OD, BA and MA, CISSP
string allUpperCase = @"(^m(v|c)p\,?\.?$)|(^dsc\.?\,?$)|(^cna\.?\," +
@"?$)|(^c{2}n(a|p)\.?\,?$)|(^mcs[ead]\.?\,?$)|(^cis(a|m\.?\,?)$)|" +
@"(^d{2}s$\.?\,?$)|(^rn\.?\,?$)|(^(m|o)\.?d\.?\,?$" +
@")|(^(b|m)\.?a\.?\,?$)|(^cis{2}p\.?\,?$)";
//// a regular expression to match the Mc's
//// and Mac's of the world, but NOT MCSE MCSD or MCSA.
//// this uses negative look ahead to rule out those possibilities.
string mcAndMac = @"^(ma?c)(?!s[ead]$)((.+))$";
//// a regular expression to match Roman numerals
string romanNumerals = @"^((?=[MDCLXVI])((M{0,3})((C[DM])|(D?" +
@"C{0,3}))?((X[LC])|(L?XX{0,2})|L)?((I[VX])|(V?(II{0,2}))|V)?)),?$";
this.patternDictionary = new Dictionary();
this.patternDictionary.Add(Pattern.AllUpperCase, allUpperCase);
this.patternDictionary.Add(Pattern.FirstAndLastCapitals, firstLastCap);
this.patternDictionary.Add(Pattern.McAndMac, mcAndMac);
this.patternDictionary.Add(Pattern.RomanNumerals, romanNumerals);
this.patternDictionary.Add(Pattern.Salutation, salutations);
}
#endregion Methods
}
}