1 files changed, 65 insertions, 12 deletions
diff --git a/tools/merge-event-log-tags.py b/tools/merge-event-log-tags.py
index 2852612..c8f36fc 100755
--- a/tools/merge-event-log-tags.py
+++ b/tools/merge-event-log-tags.py
@@ -26,16 +26,22 @@ and fails if they do.
 
 import cStringIO
 import getopt
+import md5
+import struct
 import sys
 
 import event_log_tags
 
-by_tagnum = {}
 errors = []
 warnings = []
 
 output_file = None
 
+# Tags with a tag number of ? are assigned a tag in the range
+# [ASSIGN_START, ASSIGN_LIMIT).
+ASSIGN_START = 900000
+ASSIGN_LIMIT = 1000000
+
 try:
   opts, args = getopt.getopt(sys.argv[1:], "ho:")
 except getopt.GetoptError, err:
@@ -53,6 +59,18 @@ for o, a in opts:
     print >> sys.stderr, "unhandled option %s" % (o,)
     sys.exit(1)
 
+# Restrictions on tags:
+#
+#   Tag names must be unique.  (If the tag number and description are
+#   also the same, a warning is issued instead of an error.)
+#
+#   Explicit tag numbers must be unique.  (If the tag name is also the
+#   same, no error is issued because the above rule will issue a
+#   warning or error.)
+
+by_tagname = {}
+by_tagnum = {}
+
 for fn in args:
   tagfile = event_log_tags.TagFile(fn)
 
@@ -61,24 +79,37 @@ for fn in args:
     tagname = t.tagname
     description = t.description
 
-    if t.tagnum in by_tagnum:
-      orig = by_tagnum[t.tagnum]
+    if t.tagname in by_tagname:
+      orig = by_tagname[t.tagname]
 
-      if (t.tagname == orig.tagname and
+      if (t.tagnum == orig.tagnum and
           t.description == orig.description):
         # if the name and description are identical, issue a warning
         # instead of failing (to make it easier to move tags between
         # projects without breaking the build).
-        tagfile.AddWarning("tag %d \"%s\" duplicated in %s:%d" %
-                           (t.tagnum, t.tagname, orig.filename, orig.linenum),
+        tagfile.AddWarning("tag \"%s\" (%s) duplicated in %s:%d" %
+                           (t.tagname, t.tagnum, orig.filename, orig.linenum),
                            linenum=t.linenum)
       else:
-        tagfile.AddError("tag %d used by conflicting \"%s\" from %s:%d" %
-                         (t.tagnum, orig.tagname, orig.filename, orig.linenum),
-                         linenum=t.linenum)
+        tagfile.AddError(
+            "tag name \"%s\" used by conflicting tag %s from %s:%d" %
+            (t.tagname, orig.tagnum, orig.filename, orig.linenum),
+            linenum=t.linenum)
       continue
 
-    by_tagnum[t.tagnum] = t
+    if t.tagnum is not None and t.tagnum in by_tagnum:
+      orig = by_tagnum[t.tagnum]
+
+      if t.tagname != orig.tagname:
+        tagfile.AddError(
+            "tag number %d used by conflicting tag \"%s\" from %s:%d" %
+            (t.tagnum, orig.tagname, orig.filename, orig.linenum),
+            linenum=t.linenum)
+        continue
+
+    by_tagname[t.tagname] = t
+    if t.tagnum is not None:
+      by_tagnum[t.tagnum] = t
 
   errors.extend(tagfile.errors)
   warnings.extend(tagfile.warnings)
@@ -92,9 +123,31 @@ if warnings:
   for fn, ln, msg in warnings:
     print >> sys.stderr, "%s:%d: warning: %s" % (fn, ln, msg)
 
+# Python's hash function (a) isn't great and (b) varies between
+# versions of python.  Using md5 is overkill here but is the same from
+# platform to platform and speed shouldn't matter in practice.
+def hashname(str):
+  d = md5.md5(str).digest()[:4]
+  return struct.unpack("!I", d)[0]
+
+# Assign a tag number to all the entries that say they want one
+# assigned.  We do this based on a hash of the tag name so that the
+# numbers should stay relatively stable as tags are added.
+
+for name, t in sorted(by_tagname.iteritems()):
+  if t.tagnum is None:
+    while True:
+      x = (hashname(name) % (ASSIGN_LIMIT - ASSIGN_START)) + ASSIGN_START
+      if x not in by_tagnum:
+        t.tagnum = x
+        by_tagnum[x] = t
+        break
+      name = "_" + name
+
+# by_tagnum should be complete now; we've assigned numbers to all tags.
+
 buffer = cStringIO.StringIO()
-for n in sorted(by_tagnum):
-  t = by_tagnum[n]
+for n, t in sorted(by_tagnum.iteritems()):
   if t.description:
     buffer.write("%d %s %s\n" % (t.tagnum, t.tagname, t.description))
   else: