codereview: fetch metadata using JSON API, not XML scraping

Fixes hg clpatch.

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/4524045
diff --git a/lib/codereview/codereview.py b/lib/codereview/codereview.py
index 36d7df1..5fed5ef 100644
--- a/lib/codereview/codereview.py
+++ b/lib/codereview/codereview.py
@@ -45,10 +45,23 @@
 import subprocess
 import threading
 from HTMLParser import HTMLParser
+
+# The standard 'json' package is new in Python 2.6.
+# Before that it was an external package named simplejson.
 try:
-	from xml.etree import ElementTree as ET
-except:
-	from elementtree import ElementTree as ET
+	# Standard location in 2.6 and beyond.
+	import json
+except Exception, e:
+	try:
+		# Conventional name for earlier package.
+		import simplejson as json
+	except:
+		try:
+			# Was also bundled with django, which is commonly installed.
+			from django.utils import simplejson as json
+		except:
+			# We give up.
+			raise e
 
 try:
 	hgversion = util.version()
@@ -502,14 +515,16 @@
 	else:
 		cl = CL(name)
 	if web:
-		try:
-			f = GetSettings(name)
-		except:
-			return None, "cannot load CL %s from code review server: %s" % (name, ExceptionDetail())
-		if 'reviewers' not in f:
+		set_status("getting issue metadata from web")
+		d = JSONGet(ui, "/api/" + name + "?messages=true")
+		set_status(None)
+		if d is None:
+			return None, "cannot load CL %s from server" % (name,)
+		if 'owner_email' not in d or 'issue' not in d or str(d['issue']) != name:
 			return None, "malformed response loading CL data from code review server"
-		cl.reviewer = SplitCommaSpace(f['reviewers'])
-		cl.cc = SplitCommaSpace(f['cc'])
+		cl.dict = d
+		cl.reviewer = d.get('reviewers', [])
+		cl.cc = d.get('cc', [])
 		if cl.local and cl.copied_from and cl.desc:
 			# local copy of CL written by someone else
 			# and we saved a description.  use that one,
@@ -517,9 +532,10 @@
 			# before doing hg submit.
 			pass
 		else:
-			cl.desc = f['description']
+			cl.desc = d.get('description', "")
 		cl.url = server_url_base + name
 		cl.web = True
+		cl.private = d.get('private', False) != False
 	set_status("loaded CL " + name)
 	return cl, ''
 
@@ -1330,7 +1346,9 @@
 
 	# if version does not match the patch version,
 	# try to update the patch line numbers.
-	if id != vers:
+	if vers != "" and id != vers:
+		if vers not in repo:
+			return "local repository is out of date; sync to get %s" % (vers)
 		patch, err = portPatch(repo, patch, vers, id)
 		if err != "":
 			return "codereview issue %s is out of date: %s (%s->%s)" % (clname, err, vers, id)
@@ -2000,100 +2018,84 @@
 		if self.curdata is not None:
 			self.curdata += data
 
-# XML parser
-def XMLGet(ui, path):
+def JSONGet(ui, path):
 	try:
-		data = MySend(path, force_auth=False);
+		data = MySend(path, force_auth=False)
+		typecheck(data, str)
+		d = coerce_to_utf8(json.loads(data))
 	except:
-		ui.warn("XMLGet %s: %s\n" % (path, ExceptionDetail()))
+		ui.warn("JSONGet %s: %s\n" % (path, ExceptionDetail()))
 		return None
-	return ET.XML(data)
+	return d
+
+def coerce_to_utf8(x):
+	if type(x) in [str, int, float, bool, type(None)]:
+		pass
+	elif type(x) is unicode:
+		x = x.encode("utf-8")
+	elif type(x) is list:
+		for i in range(len(x)):
+			x[i] = coerce_to_utf8(x[i])
+	elif type(x) is dict:
+		for k in x:
+			x[k] = coerce_to_utf8(x[k])
+	else:
+		raise util.Abort("unknown type " + str(type(x)) + " in coerce_to_utf8")
+	if type(x) is str:
+		x = x.replace('\r\n', '\n')
+	return x
 
 def IsRietveldSubmitted(ui, clname, hex):
-	feed = XMLGet(ui, "/rss/issue/" + clname)
-	if feed is None:
+	dict = JSONGet(ui, "/api/" + clname + "?messages=true")
+	if dict is None:
 		return False
-	for sum in feed.findall("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}summary"):
-		text = sum.text.strip()
+	for msg in dict.get("messages", []):
+		text = msg.get("text", "")
 		m = re.match('\*\*\* Submitted as [^*]*?([0-9a-f]+) \*\*\*', text)
 		if m is not None and len(m.group(1)) >= 8 and hex.startswith(m.group(1)):
 			return True
 	return False
 
-def IsRietveldMailed(ui, clname):
-	feed = XMLGet(ui, "/rss/issue/" + clname)
-	if feed is None:
-		return False
-	for sum in feed.findall("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}summary"):
-		text = sum.text.strip()
-		if re.match("I'd like you to review this change", text):
+def IsRietveldMailed(cl):
+	for msg in cl.dict.get("messages", []):
+		if msg.get("text", "").find("I'd like you to review this change") >= 0:
 			return True
 	return False
 
 def DownloadCL(ui, repo, clname):
 	set_status("downloading CL " + clname)
-	cl, err = LoadCL(ui, repo, clname)
+	cl, err = LoadCL(ui, repo, clname, web=True)
 	if err != "":
 		return None, None, None, "error loading CL %s: %s" % (clname, err)
 
-	# Grab RSS feed to learn about CL
-	feed = XMLGet(ui, "/rss/issue/" + clname)
-	if feed is None:
-		return None, None, None, "cannot download CL"
-
 	# Find most recent diff
-	diff = None
-	prefix = 'http://' + server + '/'
+	diffs = cl.dict.get("patchsets", [])
+	if not diffs:
+		return None, None, None, "CL has no patch sets"
+	patchid = diffs[-1]
+
+	patchset = JSONGet(ui, "/api/" + clname + "/" + str(patchid))
+	if patchset is None:
+		return None, None, None, "error loading CL patchset %s/%d" % (clname, patchid)
+	if patchset.get("patchset", 0) != patchid:
+		return None, None, None, "malformed patchset information"
+	
 	vers = ""
-	for entry in feed.findall("{http://www.w3.org/2005/Atom}entry"):
-		thisVers = ""
-		for title in entry.findall("{http://www.w3.org/2005/Atom}title"):
-			m = re.search('diff -r ([0-9a-f]+) ', title.text)
-			if m:
-				thisVers = m.group(1)
-		if thisVers == "":
-			continue
-		for link in entry.findall("{http://www.w3.org/2005/Atom}link"):
-			if link.get('rel') != 'alternate':
-				continue
-			text = link.get('href')
-			if not text.startswith(prefix) or not text.endswith('.diff'):
-				continue
-			diff = text[len(prefix)-1:]
-			vers = thisVers
-	if diff is None:
-		return None, None, None, "CL has no diff"
+	msg = patchset.get("message", "").split()
+	if len(msg) >= 3 and msg[0] == "diff" and msg[1] == "-r":
+		vers = msg[2]
+	diff = "/download/issue" + clname + "_" + str(patchid) + ".diff"
+
 	diffdata = MySend(diff, force_auth=False)
-
-	# Find author - first entry will be author who created CL.
-	nick = None
-	for author in feed.findall("{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}author/{http://www.w3.org/2005/Atom}name"):
-		nick = author.text.strip()
-		break
-	if not nick:
-		return None, None, None, "CL has no author"
-
-	# The author is just a nickname: get the real email address.
-	try:
-		# want URL-encoded nick, but without a=, and rietveld rejects + for %20.
-		url = "/user_popup/" + urllib.urlencode({"a": nick})[2:].replace("+", "%20")
-		data = MySend(url, force_auth=False)
-	except:
-		ui.warn("error looking up %s: %s\n" % (nick, ExceptionDetail()))
-		cl.copied_from = nick+"@needtofix"
-		return cl, vers, diffdata, ""
-	match = re.match(r"<b>(.*) \((.*)\)</b>", data)
-	if not match:
-		return None, None, "error looking up %s: cannot parse result %s" % (nick, repr(data))
-	if match.group(1) != nick and match.group(2) != nick:
-		return None, None, "error looking up %s: got info for %s, %s" % (nick, match.group(1), match.group(2))
-	email = match.group(1)
-
+	
 	# Print warning if email is not in CONTRIBUTORS file.
+	email = cl.dict.get("owner_email", "")
+	if not email:
+		return None, None, None, "cannot find owner for %s" % (clname)
 	him = FindContributor(ui, repo, email)
 	me = FindContributor(ui, repo, None)
 	if him == me:
-		cl.mailed = IsRietveldMailed(ui, clname)
+		cl.mailed = IsRietveldMailed(cl)
 	else:
 		cl.copied_from = email
 
@@ -2192,25 +2194,6 @@
 		m[k.encode("utf-8")] = v.replace("\r\n", "\n").encode("utf-8")
 	return m
 
-# Fetch the settings for the CL, like reviewer and CC list, by
-# scraping the Rietveld editing forms.
-def GetSettings(issue):
-	set_status("getting issue metadata from web")
-	# The /issue/edit page has everything but only the
-	# CL owner is allowed to fetch it (and submit it).
-	f = None
-	try:
-		f = GetForm("/" + issue + "/edit")
-	except:
-		pass
-	if not f or 'reviewers' not in f:
-		# Maybe we're not the CL owner.  Fall back to the
-		# /publish page, which has the reviewer and CC lists,
-		# and then fetch the description separately.
-		f = GetForm("/" + issue + "/publish")
-		f['description'] = MySend("/"+issue+"/description", force_auth=False)
-	return f
-
 def EditDesc(issue, subject=None, desc=None, reviewers=None, cc=None, closed=False, private=False):
 	set_status("uploading change to description")
 	form_fields = GetForm("/" + issue + "/edit")