<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=iso-8859-1">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<style>
<!--
/* Font Definitions */
@font-face
        {font-family:Tahoma;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0pt;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman";}
h1
        {margin-top:12.0pt;
        margin-right:0pt;
        margin-bottom:3.0pt;
        margin-left:0pt;
        page-break-after:avoid;
        font-size:16.0pt;
        font-family:Arial;}
h2
        {margin-top:12.0pt;
        margin-right:0pt;
        margin-bottom:3.0pt;
        margin-left:0pt;
        page-break-after:avoid;
        font-size:14.0pt;
        font-family:Arial;
        font-style:italic;}
h3
        {margin-top:12.0pt;
        margin-right:0pt;
        margin-bottom:3.0pt;
        margin-left:0pt;
        page-break-after:avoid;
        font-size:12.0pt;
        font-family:Arial;}
h4
        {margin-top:12.0pt;
        margin-right:0pt;
        margin-bottom:3.0pt;
        margin-left:0pt;
        page-break-after:avoid;
        font-size:10.0pt;
        font-family:"Times New Roman";
        font-style:italic;}
p.MsoHeader, li.MsoHeader, div.MsoHeader
        {margin:0pt;
        margin-bottom:.0001pt;
        border:none;
        padding:0pt;
        font-size:10.0pt;
        font-family:Arial;}
p.MsoFooter, li.MsoFooter, div.MsoFooter
        {margin:0pt;
        margin-bottom:.0001pt;
        border:none;
        padding:0pt;
        font-size:10.0pt;
        font-family:Arial;}
p.MsoTitle, li.MsoTitle, div.MsoTitle
        {margin-top:0pt;
        margin-right:0pt;
        margin-bottom:9.0pt;
        margin-left:0pt;
        text-align:center;
        font-size:16.0pt;
        font-family:Arial;
        font-weight:bold;}
p.MsoBodyText, li.MsoBodyText, div.MsoBodyText
        {margin-top:0pt;
        margin-right:0pt;
        margin-bottom:6.0pt;
        margin-left:0pt;
        font-size:12.0pt;
        font-family:"Times New Roman";}
p.MsoSubtitle, li.MsoSubtitle, div.MsoSubtitle
        {margin-top:0pt;
        margin-right:0pt;
        margin-bottom:18.0pt;
        margin-left:0pt;
        text-align:center;
        font-size:12.0pt;
        font-family:Arial;}
a:link, span.MsoHyperlink
        {color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {color:blue;
        text-decoration:underline;}
p.Quote, li.Quote, div.Quote
        {margin-top:0pt;
        margin-right:36.0pt;
        margin-bottom:6.0pt;
        margin-left:36.0pt;
        font-size:12.0pt;
        font-family:"Times New Roman";
        font-style:italic;}
p.Wiki, li.Wiki, div.Wiki
        {margin:0pt;
        margin-bottom:.0001pt;
        font-size:10.0pt;
        font-family:"Courier New";}
p.Graphic, li.Graphic, div.Graphic
        {margin-top:0pt;
        margin-right:0pt;
        margin-bottom:6.0pt;
        margin-left:0pt;
        text-align:center;
        font-size:10.0pt;
        font-family:Arial;
        font-style:italic;}
span.EmailStyle26
        {mso-style-type:personal-reply;
        font-family:Arial;
        color:navy;}
/* Page Definitions */
@page
        {mso-endnote-separator:url("cid:header.htm\@01C8E2D6.4C684640") es;
        mso-endnote-continuation-separator:url("cid:header.htm\@01C8E2D6.4C684640") ecs;}
@page Section1
        {size:612.0pt 792.0pt;
        margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.Section1
        {page:Section1;}
/* List Definitions */
@list l0
        {mso-list-id:-132;
        mso-list-type:simple;
        mso-list-template-ids:-1328661930;}
@list l0:level1
        {mso-level-tab-stop:90.0pt;
        mso-level-number-position:left;
        margin-left:90.0pt;
        text-indent:-18.0pt;}
@list l1
        {mso-list-id:-131;
        mso-list-type:simple;
        mso-list-template-ids:-909054546;}
@list l1:level1
        {mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        margin-left:72.0pt;
        text-indent:-18.0pt;}
@list l2
        {mso-list-id:-130;
        mso-list-type:simple;
        mso-list-template-ids:531935922;}
@list l2:level1
        {mso-level-tab-stop:54.0pt;
        mso-level-number-position:left;
        margin-left:54.0pt;
        text-indent:-18.0pt;}
@list l3
        {mso-list-id:-129;
        mso-list-type:simple;
        mso-list-template-ids:2046339550;}
@list l3:level1
        {mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l4
        {mso-list-id:-128;
        mso-list-type:simple;
        mso-list-template-ids:82112870;}
@list l4:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:90.0pt;
        mso-level-number-position:left;
        margin-left:90.0pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l5
        {mso-list-id:-127;
        mso-list-type:simple;
        mso-list-template-ids:-1405587484;}
@list l5:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        margin-left:72.0pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l6
        {mso-list-id:-126;
        mso-list-type:simple;
        mso-list-template-ids:828961842;}
@list l6:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:54.0pt;
        mso-level-number-position:left;
        margin-left:54.0pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l7
        {mso-list-id:-125;
        mso-list-type:simple;
        mso-list-template-ids:1053828088;}
@list l7:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l8
        {mso-list-id:-120;
        mso-list-type:simple;
        mso-list-template-ids:-2021464228;}
@list l8:level1
        {mso-level-tab-stop:18.0pt;
        mso-level-number-position:left;
        margin-left:18.0pt;
        text-indent:-18.0pt;}
@list l9
        {mso-list-id:-119;
        mso-list-type:simple;
        mso-list-template-ids:445916746;}
@list l9:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:18.0pt;
        mso-level-number-position:left;
        margin-left:18.0pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
ol
        {margin-bottom:0pt;}
ul
        {margin-bottom:0pt;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=EN-US link=blue vlink=blue>
<div class=Section1>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'>My apologies, I forgot to clarify that the
XRI specifications require that when an XRI is transformed into an HTTP(S) URI (called
an HXRI in the spec), it must be transformation into URI-normal form as defined
in the XRI Syntax 2.0 spec [1]. That transformation (described earlier in this
thread) involves a simple mechanical transformation into IRI-normal form, then
following the IRI spec (RFC 3987) to apply the percent-encoding of Unicode
characters.<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'><o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'>=Drummond <o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'><o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'>[1] <a
href="http://docs.oasis-open.org/xri/xri-syntax/2.0/specs/cs01/xri-syntax-V2.0-cs.html">http://docs.oasis-open.org/xri/xri-syntax/2.0/specs/cs01/xri-syntax-V2.0-cs.html</a>
<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'><o:p> </o:p></span></font></p>
<div style='border:none;border-left:solid blue 1.5pt;padding:0pt 0pt 0pt 4.0pt'>
<div>
<div class=MsoNormal align=center style='text-align:center'><font size=3
face="Times New Roman"><span style='font-size:12.0pt'>
<hr size=2 width="100%" align=center tabindex=-1>
</span></font></div>
<p class=MsoNormal><b><font size=2 face=Tahoma><span style='font-size:10.0pt;
font-family:Tahoma;font-weight:bold'>From:</span></font></b><font size=2
face=Tahoma><span style='font-size:10.0pt;font-family:Tahoma'> Andrew Arnott
[mailto:andrewarnott@gmail.com] <br>
<b><span style='font-weight:bold'>Sent:</span></b> Thursday, July 10, 2008 1:02
PM<br>
<b><span style='font-weight:bold'>To:</span></b> Drummond Reed<br>
<b><span style='font-weight:bold'>Cc:</span></b> Peter Williams; Johnny Bufu;
OpenID List<br>
<b><span style='font-weight:bold'>Subject:</span></b> Re: [OpenID] Canonical
OpenID url form</span></font><o:p></o:p></p>
</div>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'><o:p> </o:p></span></font></p>
<p class=MsoNormal style='margin-bottom:12.0pt'><font size=3
face="Times New Roman"><span style='font-size:12.0pt'>If XRIs allow unicode
characters and URIs do not, then prefixing <a href="http://xri.net/">http://xri.net/</a>
in front of an XRI does <i><span style='font-style:italic'>not</span></i>
guarantee a proper URI. It merely makes it look like one. But if
foreign characters exist in the XRI, they must be properly % encoded for the
result to be a proper URI.<o:p></o:p></span></font></p>
<div>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'>On Thu, Jul 10, 2008 at 11:31 AM, Drummond Reed <<a
href="mailto:drummond.reed@cordance.net">drummond.reed@cordance.net</a>>
wrote:<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'>Martin's right, Peter -- XRI is one option for Unicode. But you can
also use<br>
an internationalized domain name<br>
(<a href="http://en.wikipedia.org/wiki/Internationalized_domain_name"
target="_blank">http://en.wikipedia.org/wiki/Internationalized_domain_name</a>)
in a regular<br>
URL. It uses Punycode (<a href="http://en.wikipedia.org/wiki/Punycode"
target="_blank">http://en.wikipedia.org/wiki/Punycode</a>).<br>
<br>
You can also turn an XRI into an URL by adding an XRI proxy resolver prefix<br>
(such as <a href="http://xri.net/" target="_blank">http://xri.net/</a> -- see
my sig below for an example). In that<br>
approach the proxy resolver prefix has nothing to do with the XRI itself, so<br>
there's no need to internationalize the domain name.<br>
<br>
=Drummond<br>
<a href="http://xri.net/=drummond.reed" target="_blank">http://xri.net/=drummond.reed</a><o:p></o:p></span></font></p>
<div>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'><br>
<br>
> -----Original Message-----<br>
> From: Peter Williams [mailto:<a href="mailto:pwilliams@rapattoni.com">pwilliams@rapattoni.com</a>]<br>
> Sent: Wednesday, July 09, 2008 11:40 PM<br>
> To: Drummond Reed; 'Johnny Bufu'; 'Andrew Arnott'<br>
> Cc: 'OpenID List'<o:p></o:p></span></font></p>
</div>
<div>
<div>
<p class=MsoNormal style='margin-bottom:12.0pt'><font size=3
face="Times New Roman"><span style='font-size:12.0pt'>> Subject: RE:
[OpenID] Canonical OpenID url form<br>
><br>
> So the short form of the story is: use xri for unicode (and then transform<br>
> the xri into an https hxri).<br>
><br>
> Its been a month since I studied xri (and thus have forgotten 80 percent<br>
> of it). I recall there was a syntax to identify the address of the initial<br>
> resolver. Is there a way tha this became the domain name componnt of the<br>
> hxri<br>
><br>
> -----Original Message-----<br>
> From: Drummond Reed <<a href="mailto:drummond.reed@cordance.net">drummond.reed@cordance.net</a>><br>
> Sent: Wednesday, July 09, 2008 11:34 PM<br>
> To: 'Johnny Bufu' <<a href="mailto:johnny.bufu@gmail.com">johnny.bufu@gmail.com</a>>;
'Andrew Arnott'<br>
> <<a href="mailto:andrewarnott@gmail.com">andrewarnott@gmail.com</a>><br>
> Cc: 'OpenID List' <<a href="mailto:general@openid.net">general@openid.net</a>><br>
> Subject: Re: [OpenID] Canonical OpenID url form<br>
><br>
><br>
> Also for the record, XRIs (which use the IRI character set) have a very<br>
> simple defined transformation into IRIs. Thus when an XRI needs to be sent<br>
> over-the-wire in an HTTP(S) URI, it must first be transformed into an IRI,<br>
> then you follow the IRI spec (RFC 3987) to transform into a URI as Johnny<br>
> describes below. Reverse the process to display back to the user.<br>
><br>
> See<br>
> <a
href="http://docs.oasis-open.org/xri/xri-syntax/2.0/specs/cs01/xri-syntax-V2.0-"
target="_blank">http://docs.oasis-open.org/xri/xri-syntax/2.0/specs/cs01/xri-syntax-V2.0-</a><br>
> cs.<br>
> html for all the gory details (and they are gory - Unicode is hard).<br>
><br>
> =Drummond<br>
><br>
> > -----Original Message-----<br>
> > From: <a href="mailto:general-bounces@openid.net">general-bounces@openid.net</a>
[mailto:<a href="mailto:general-bounces@openid.net">general-bounces@openid.net</a>]
On<br>
> > Behalf Of Johnny Bufu<br>
> > Sent: Wednesday, July 09, 2008 10:52 PM<br>
> > To: Andrew Arnott<br>
> > Cc: OpenID List<br>
> > Subject: Re: [OpenID] Canonical OpenID url form<br>
> ><br>
> > For the record, since this continued in an offline thread:<br>
> ><br>
> > The issue is around the User-Supplied Identifiers. OpenID defines
them<br>
> > as a type of Identifiers, which in turn defined as HTTP(S) URI or
XRIs.<br>
> > HTTP(S) URI do not allow non-ASCII characters.<br>
> ><br>
> > So, out of scope of OpenID, parties accepting IRIs (other than XRIs)<br>
> > should follow the respective authoritative recommendations (i.e.<br>
> > RFC3987) before presenting such strings to the OpenID layer as HTTP<br>
> > URIs, and convert them back to IRI form later on when they need to be<br>
> > displayed back to the users.<br>
> ><br>
> > Johnny<br>
> ><br>
> > On 08/07/08 10:32 PM, Andrew Arnott wrote:<br>
> > > Thanks, Johnny. I've had some conversations with a few
other people<br>
> > > who draw the opposite conclusion and believe that the %AB%CD
notation<br>
> > > is the canonical form.<br>
> > ><br>
> > > You make a good point about having to unescape the characters
from<br>
> > > the URI just above the transport layer, but I believe you're
applying<br>
> > > section 4.1 to the URL when it should only be applied to
the<br>
> > > key/value pairs. The OpenID ClaimedIdentifier, which by
the spec is<br>
> > > the last URL to respond without an HTTP redirect, cannot be in<br>
> > > unicode by the URI specification because unicode characters are
not<br>
> > > allowed, whether that is UTF8 or UTF16.<br>
> > ><br>
> > > Name/value pairs passed as part of a querystring may (and as the<br>
> > > section you quote requires) be encoded as UTF-8, but they are<br>
> > > subsequently URI encoded as %AB%CD hex characters (thus doubly<br>
> > > encoded) so they are actually no longer UTF-8 at the transport
layer.<br>
> > > Since the OpenID URL, around which all the identity of
OpenID is<br>
> > > focused (omiting XRIs which don't suffer from this problem) /is/
at<br>
> > > the transport layer of the way the security requirements force
the<br>
> > > claimed identifier to be discovered, is all about the transport<br>
> > > layer, I believe it would be a mistake to add semantics on top
of<br>
> > > that and call it canonical.<br>
> > ><br>
> > > What I also realized from some other conversations is that this<br>
> > > doesn't really matter. As long as an OP or RP is consistent
within<br>
> > > itself in storing and comparing Claimed Identifiers, whether it<br>
> > > stores and compares %AB%CD or the unicode equivalent character
won't<br>
> > > matter to anyone, since on the protocol/wire level it is always<br>
> > > %AB%CD. However, I think unescaping the URL and getting
the original<br>
> > > unicode characters back is very useful and should be done
for<br>
> > > purposes of displaying to the user.<br>
> > ><br>
> > > I think for the security and guaranteed identity of the
protocol,<br>
> > > there is a meaningful side to this though. It has not got
to do with<br>
> > > how the claimed identifier is stored, but rather how a
unicode<br>
> > > string is escaped for URI transport. A given unicode
string may be<br>
> > > represented by more than just one series of bytes. Unicode<br>
> > > characters exist that in UTF-8 or UTF-16 have multiple byte
sequences<br>
> > > /for the same character/. Therefore someone who is typing
in their<br>
> > > OpenID url to a site using one method during one visit, and then<br>
> > > types it in to the same site using a different method on a
subsequent<br>
> > > visit, will only be identified by the RP as the same
visitor if<br>
> > > OpenID requires that the RP transforms whatever unicode string
is<br>
> > > given by the user to the canonical byte form as defined by the<br>
> > > unicode standard before transit. For example, the letter
'Á' can be<br>
> > > encoded as a single character or using composition by adding an<br>
> > > accent to the A character. Both are legal, but the unicode
standard<br>
> > > defines one as canonical (I think). But if a string
containing this<br>
> > > character is not canonicalized first, then although the
character is<br>
> > > equivalent to the user and to unicode, the encoded %AB%CD string
will<br>
> > > be different, resulting in security problems for OpenID because<br>
> > > people could overload a single Identifier just by using
different<br>
> > > encodings at an OP, or fail to log into an RP depending on how
they<br>
> > > craft their string. By the way, I say 'unicode' in the strict
sense,<br>
> > > applying to UTF-8, UTF-16, etc. Unicode is commonly used
to refer to<br>
> > > just UTF-16, but this problem applies to all unicode character
sizes.<br>
> > ><br>
> > ><br>
> > ><br>
> > ><br>
> > > So I think OpenID should be more explicit about its unicode
support<br>
> > > for Identifiers, including mandating a canonical Unicode form.<br>
> > ><br>
> > > On Tue, Jul 8, 2008 at 9:41 PM, Johnny Bufu <<a
href="mailto:johnny.bufu@gmail.com">johnny.bufu@gmail.com</a><br>
> > > <mailto:<a href="mailto:johnny.bufu@gmail.com">johnny.bufu@gmail.com</a>>>
wrote:<br>
> > ><br>
> > ><br>
> > > On 08/07/08 03:01 PM, Andrew Arnott wrote:<br>
> > ><br>
> > > What is the canonical form of an OpenID URL? One with the %AB%CD
hex<br>
> > > encoding for unicode chars in the URL or with the actual unicode<br>
> > > chars? For the purposes of displaying to the user and storing in
the<br>
> > > RP's database.<br>
> > ><br>
> > > The spec doesn't seem to have anything to say on this.<br>
> > ><br>
> > ><br>
> > > I believe it does say:<br>
> > ><br>
> > > 4.1. Protocol Messages The OpenID Authentication protocol
messages<br>
> > > are mappings of plain-text keys to plain-text values. The keys
and<br>
> > > values permit the full Unicode character set (UCS). When the
keys and<br>
> > > values need to be converted to/from bytes, they MUST be
encoded<br>
> > > using UTF-8 [RFC3629].<br>
> > ><br>
> > > <a
href="http://openid.net/specs/openid-authentication-2_0.html#anchor4"
target="_blank">http://openid.net/specs/openid-authentication-2_0.html#anchor4</a><br>
> > ><br>
> > ><br>
> > > The reason I think it's not a simple automatic answer is the
unicode<br>
> > > chars may be what the user typed in and what exists on the
server,<br>
> > > but in transit, these characters are translated to %AB%CD in
order to<br>
> > > be validly escaped URI strings.<br>
> > ><br>
> > ><br>
> > > The receiving party must decode them to the original form when
they<br>
> > > are extracted from the transport layer.<br>
> > ><br>
> > ><br>
> > > So one could argue that the unicode characters are never part of
the<br>
> > > protocol<br>
> > ><br>
> > ><br>
> > > One would then be ignoring the parts of the protocol that do not
deal<br>
> > > with the transport layer directly.<br>
> > ><br>
> > ><br>
> > > Johnny<br>
> > ><br>
> > ><br>
> > > !DSPAM:139,48744d86221113907413095!<br>
> > _______________________________________________<br>
> > general mailing list<br>
> > <a href="mailto:general@openid.net">general@openid.net</a><br>
> > <a href="http://openid.net/mailman/listinfo/general" target="_blank">http://openid.net/mailman/listinfo/general</a><br>
><br>
> _______________________________________________<br>
> general mailing list<br>
> <a href="mailto:general@openid.net">general@openid.net</a><br>
> <a href="http://openid.net/mailman/listinfo/general" target="_blank">http://openid.net/mailman/listinfo/general</a><o:p></o:p></span></font></p>
</div>
</div>
</div>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'><o:p> </o:p></span></font></p>
</div>
</div>
</body>
</html>