arch/tile: core support for Tilera 32-bit chips.

This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
author: Chris Metcalf <cmetcalf@tilera.com> 2010-05-28 23:09:12 -0400
committer: Chris Metcalf <cmetcalf@tilera.com> 2010-06-04 17:11:18 -0400
commit: 867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree: c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile
parent: 5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
download: kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.zip
kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.tar.gz
kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.tar.bz2
199 files changed, 49495 insertions, 0 deletions
diff --git a/arch/tile/Kbuild b/arch/tile/Kbuild
new file mode 100644
index 0000000..a9b9227
--- /dev/null
+++ b/arch/tile/Kbuild
@@ -0,0 +1,3 @@
+
+obj-y += kernel/
+obj-y += mm/
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
new file mode 100644
index 0000000..290ef41
--- /dev/null
+++ b/arch/tile/Kconfig
@@ -0,0 +1,352 @@
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+
+config MMU
+	def_bool y
+
+config GENERIC_CSUM
+	def_bool y
+
+config GENERIC_HARDIRQS
+	def_bool y
+
+config GENERIC_HARDIRQS_NO__DO_IRQ
+	def_bool y
+
+config GENERIC_IRQ_PROBE
+	def_bool y
+
+config GENERIC_PENDING_IRQ
+	def_bool y
+	depends on GENERIC_HARDIRQS && SMP
+
+config SEMAPHORE_SLEEPERS
+	def_bool y
+
+config HAVE_ARCH_ALLOC_REMAP
+	def_bool y
+
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool y
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+        def_bool y
+
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+
+config GENERIC_TIME
+	def_bool y
+
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
+# FIXME: tilegx can implement a more efficent rwsem.
+config RWSEM_GENERIC_SPINLOCK
+	def_bool y
+
+# We have a very flat architecture from a migration point of view,
+# so save boot time by presetting this (particularly useful on tile-sim).
+config DEFAULT_MIGRATION_COST
+	int
+	default "10000000"
+
+# We only support gcc 4.4 and above, so this should work.
+config ARCH_SUPPORTS_OPTIMIZED_INLINING
+	def_bool y
+
+config ARCH_PHYS_ADDR_T_64BIT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+	select STACKTRACE
+
+# We use discontigmem for now; at some point we may want to switch
+# to sparsemem (Tilera bug 7996).
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool y
+
+config ARCH_DISCONTIGMEM_DEFAULT
+	def_bool y
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config STRICT_DEVMEM
+	def_bool y
+
+# SMP is required for Tilera Linux.
+config SMP
+	def_bool y
+
+# Allow checking for compile-time determined overflow errors in
+# copy_from_user().  There are still unprovable places in the
+# generic code as of 2.6.34, so this option is not really compatible
+# with -Werror, which is more useful in general.
+config DEBUG_COPY_FROM_USER
+	def_bool n
+
+config HVC_TILE
+	select HVC_DRIVER
+	def_bool y
+
+config TILE
+	def_bool y
+	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_FIND_NEXT_BIT
+	select USE_GENERIC_SMP_HELPERS
+	select CC_OPTIMIZE_FOR_SIZE
+
+# FIXME: investigate whether we need/want these options.
+#	select HAVE_IOREMAP_PROT
+#       select HAVE_OPTPROBES
+#       select HAVE_REGS_AND_STACK_ACCESS_API
+#       select HAVE_HW_BREAKPOINT
+#       select PERF_EVENTS
+#       select HAVE_USER_RETURN_NOTIFIER
+#       config NO_BOOTMEM
+#       config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+#       config HUGETLB_PAGE_SIZE_VARIABLE
+
+
+mainmenu "Linux/TILE Kernel Configuration"
+
+# Please note: TILE-Gx support is not yet finalized; this is
+# the preliminary support.  TILE-Gx drivers are only provided
+# with the alpha or beta test versions for Tilera customers.
+config TILEGX
+	depends on EXPERIMENTAL
+	bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+
+config 64BIT
+	depends on TILEGX
+	def_bool y
+
+config ARCH_DEFCONFIG
+	string
+	default "arch/tile/configs/tile_defconfig" if !TILEGX
+	default "arch/tile/configs/tilegx_defconfig" if TILEGX
+
+source "init/Kconfig"
+
+menu "Tilera-specific configuration"
+
+config NR_CPUS
+	int "Maximum number of tiles (2-255)"
+	range 2 255
+	depends on SMP
+	default "64"
+	---help---
+	  Building with 64 is the recommended value, but a slightly
+	  smaller kernel memory footprint results from using a smaller
+	  value on chips with fewer tiles.
+
+source "kernel/time/Kconfig"
+
+source "kernel/Kconfig.hz"
+
+config KEXEC
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   It is used
+	  to implement the "mboot" Tilera booter.
+
+	  The name comes from the similarity to the exec system call.
+
+config COMPAT
+	bool "Support 32-bit TILE-Gx binaries in addition to 64-bit"
+	depends on TILEGX
+	select COMPAT_BINFMT_ELF
+	default y
+	---help---
+	  If enabled, the kernel will support running TILE-Gx binaries
+	  that were built with the -m32 option.
+
+config SYSVIPC_COMPAT
+	def_bool y
+	depends on COMPAT && SYSVIPC
+
+# We do not currently support disabling HIGHMEM on tile64 and tilepro.
+config HIGHMEM
+	bool # "Support for more than 512 MB of RAM"
+	default !TILEGX
+	---help---
+	  Linux can use the full amount of RAM in the system by
+	  default.  However, the address space of TILE processors is
+	  only 4 Gigabytes large. That means that, if you have a large
+	  amount of physical memory, not all of it can be "permanently
+	  mapped" by the kernel. The physical memory that's not
+	  permanently mapped is called "high memory".
+
+	  If you are compiling a kernel which will never run on a
+	  machine with more than 512 MB total physical RAM, answer
+	  "false" here. This will result in the kernel mapping all of
+	  physical memory into the top 1 GB of virtual memory space.
+
+	  If unsure, say "true".
+
+# We do not currently support disabling NUMA.
+config NUMA
+	bool # "NUMA Memory Allocation and Scheduler Support"
+	depends on SMP && DISCONTIGMEM
+	default y
+	---help---
+	  NUMA memory allocation is required for TILE processors
+	  unless booting with memory striping enabled in the
+	  hypervisor, or with only a single memory controller.
+	  It is recommended that this option always be enabled.
+
+config NODES_SHIFT
+	int "Log base 2 of the max number of memory controllers"
+	default 2
+	depends on NEED_MULTIPLE_NODES
+	---help---
+	  By default, 2, i.e. 2^2 == 4 DDR2 controllers.
+	  In a system with more controllers, this value should be raised.
+
+# Need 16MB areas to enable hugetlb
+# See build-time check in arch/tile/mm/init.c.
+config FORCE_MAX_ZONEORDER
+	int
+	default 9
+
+choice
+	depends on !TILEGX
+	prompt "Memory split" if EMBEDDED
+	default VMSPLIT_3G
+	---help---
+	  Select the desired split between kernel and user memory.
+
+	  If the address range available to the kernel is less than the
+	  physical memory installed, the remaining memory will be available
+	  as "high memory". Accessing high memory is a little more costly
+	  than low memory, as it needs to be mapped into the kernel first.
+	  Note that increasing the kernel address space limits the range
+	  available to user programs, making the address space there
+	  tighter.  Selecting anything other than the default 3G/1G split
+	  will also likely make your kernel incompatible with binary-only
+	  kernel modules.
+
+	  If you are not absolutely sure what you are doing, leave this
+	  option alone!
+
+	config VMSPLIT_375G
+		bool "3.75G/0.25G user/kernel split (no kernel networking)"
+	config VMSPLIT_35G
+		bool "3.5G/0.5G user/kernel split"
+	config VMSPLIT_3G
+		bool "3G/1G user/kernel split"
+	config VMSPLIT_3G_OPT
+		bool "3G/1G user/kernel split (for full 1G low memory)"
+	config VMSPLIT_2G
+		bool "2G/2G user/kernel split"
+	config VMSPLIT_1G
+		bool "1G/3G user/kernel split"
+endchoice
+
+config PAGE_OFFSET
+	hex
+	default 0xF0000000 if VMSPLIT_375G
+	default 0xE0000000 if VMSPLIT_35G
+	default 0xB0000000 if VMSPLIT_3G_OPT
+	default 0x80000000 if VMSPLIT_2G
+	default 0x40000000 if VMSPLIT_1G
+	default 0xC0000000
+
+source "mm/Kconfig"
+
+config CMDLINE_BOOL
+	bool "Built-in kernel command line"
+	default n
+	---help---
+	  Allow for specifying boot arguments to the kernel at
+	  build time.  On some systems (e.g. embedded ones), it is
+	  necessary or convenient to provide some or all of the
+	  kernel boot arguments with the kernel itself (that is,
+	  to not rely on the boot loader to provide them.)
+
+	  To compile command line arguments into the kernel,
+	  set this option to 'Y', then fill in the
+	  the boot arguments in CONFIG_CMDLINE.
+
+	  Systems with fully functional boot loaders (e.g. mboot, or
+	  if booting over PCI) should leave this option set to 'N'.
+
+config CMDLINE
+	string "Built-in kernel command string"
+	depends on CMDLINE_BOOL
+	default ""
+	---help---
+	  Enter arguments here that should be compiled into the kernel
+	  image and used at boot time.  If the boot loader provides a
+	  command line at boot time, it is appended to this string to
+	  form the full kernel command line, when the system boots.
+
+	  However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+	  change this behavior.
+
+	  In most cases, the command line (whether built-in or provided
+	  by the boot loader) should specify the device for the root
+	  file system.
+
+config CMDLINE_OVERRIDE
+	bool "Built-in command line overrides boot loader arguments"
+	default n
+	depends on CMDLINE_BOOL
+	---help---
+	  Set this option to 'Y' to have the kernel ignore the boot loader
+	  command line, and use ONLY the built-in command line.
+
+	  This is used to work around broken boot loaders.  This should
+	  be set to 'N' under normal conditions.
+
+config VMALLOC_RESERVE
+	hex
+	default 0x1000000
+
+endmenu  # Tilera-specific configuration
+
+menu "Bus options"
+
+config NO_IOMEM
+	def_bool !PCI
+
+config NO_IOPORT
+	def_bool !PCI
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+
+# only elf supported
+config KCORE_ELF
+	def_bool y
+	depends on PROC_FS
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/tile/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug
new file mode 100644
index 0000000..a81f0fb
--- /dev/null
+++ b/arch/tile/Kconfig.debug
@@ -0,0 +1,43 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+	bool "Early printk" if EMBEDDED && DEBUG_KERNEL
+	default y
+	help
+	  Write kernel log output directly via the hypervisor console.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized. For normal operation
+	  it is not recommended because it looks ugly and doesn't cooperate
+	  with klogd/syslogd. You should normally N here,
+	  unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+	help
+	  This option will cause messages to be printed if free stack space
+	  drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	depends on DEBUG_KERNEL
+	help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
+config DEBUG_EXTRA_FLAGS
+	string "Additional compiler arguments when building with '-g'"
+	depends on DEBUG_INFO
+	default ""
+	help
+	  Debug info can be large, and flags like
+	  `-femit-struct-debug-baseonly' can reduce the kernel file
+	  size and build time noticeably.  Such flags are often
+	  helpful if the main use of debug info is line number info.
+
+endmenu
diff --git a/arch/tile/Makefile b/arch/tile/Makefile
new file mode 100644
index 0000000..07c4318
--- /dev/null
+++ b/arch/tile/Makefile
@@ -0,0 +1,52 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+
+ifeq ($(CROSS_COMPILE),)
+# If building with TILERA_ROOT set (i.e. using the Tilera Multicore
+# Development Environment) we can set CROSS_COMPILE based on that.
+ifdef TILERA_ROOT
+CROSS_COMPILE	= $(TILERA_ROOT)/bin/tile-
+endif
+endif
+
+# If we're not cross-compiling, make sure we're on the right architecture.
+ifeq ($(CROSS_COMPILE),)
+HOST_ARCH = $(shell uname -m)
+ifneq ($(HOST_ARCH),$(ARCH))
+$(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH))
+endif
+endif
+
+
+KBUILD_CFLAGS   += $(CONFIG_DEBUG_EXTRA_FLAGS)
+
+LIBGCC_PATH     := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+
+# Provide the path to use for "make defconfig".
+KBUILD_DEFCONFIG := $(ARCH)_defconfig
+
+# Used as a file extension when useful, e.g. head_$(BITS).o
+# Not needed for (e.g.) "$(CC) -m32" since the compiler automatically
+# uses the right default anyway.
+export BITS
+ifeq ($(CONFIG_TILEGX),y)
+BITS := 64
+else
+BITS := 32
+endif
+
+head-y		:= arch/tile/kernel/head_$(BITS).o
+
+libs-y		+= arch/tile/lib/
+libs-y		+= $(LIBGCC_PATH)
+
+
+# See arch/tile/Kbuild for content of core part of the kernel
+core-y		+= arch/tile/
diff --git a/arch/tile/configs/tile_defconfig b/arch/tile/configs/tile_defconfig
new file mode 100644
index 0000000..74a5be3
--- /dev/null
+++ b/arch/tile/configs/tile_defconfig
@@ -0,0 +1,1289 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.34
+# Fri May 28 17:51:43 2010
+#
+CONFIG_MMU=y
+CONFIG_GENERIC_CSUM=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_ZONE_DMA=y
+CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_HAVE_ARCH_ALLOC_REMAP=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
+CONFIG_SYS_SUPPORTS_HUGETLBFS=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_DEFAULT_MIGRATION_COST=10000000
+CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
+CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_STRICT_DEVMEM=y
+CONFIG_SMP=y
+CONFIG_WERROR=y
+# CONFIG_DEBUG_COPY_FROM_USER is not set
+CONFIG_SERIAL_CONSOLE=y
+CONFIG_HVC_TILE=y
+CONFIG_TILE=y
+# CONFIG_TILEGX is not set
+CONFIG_ARCH_DEFCONFIG="arch/tile/configs/tile_defconfig"
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_CONSTRUCTORS=y
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_TINY_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_RCU_FAST_NO_HZ is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_CGROUPS is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="usr/contents.txt"
+CONFIG_INITRAMFS_ROOT_UID=0
+CONFIG_INITRAMFS_ROOT_GID=0
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+CONFIG_INITRAMFS_COMPRESSION_NONE=y
+# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZO is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+
+#
+# Kernel Performance Events And Counters
+#
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_PCI_QUIRKS=y
+CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_SLOW_WORK is not set
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBDAF=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+# CONFIG_INLINE_SPIN_TRYLOCK is not set
+# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK is not set
+# CONFIG_INLINE_SPIN_LOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
+CONFIG_INLINE_SPIN_UNLOCK=y
+# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
+CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
+# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_READ_TRYLOCK is not set
+# CONFIG_INLINE_READ_LOCK is not set
+# CONFIG_INLINE_READ_LOCK_BH is not set
+# CONFIG_INLINE_READ_LOCK_IRQ is not set
+# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
+CONFIG_INLINE_READ_UNLOCK=y
+# CONFIG_INLINE_READ_UNLOCK_BH is not set
+CONFIG_INLINE_READ_UNLOCK_IRQ=y
+# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_WRITE_TRYLOCK is not set
+# CONFIG_INLINE_WRITE_LOCK is not set
+# CONFIG_INLINE_WRITE_LOCK_BH is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
+CONFIG_INLINE_WRITE_UNLOCK=y
+# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
+CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
+# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
+CONFIG_MUTEX_SPIN_ON_OWNER=y
+
+#
+# Tilera-specific configuration
+#
+CONFIG_NR_CPUS=64
+CONFIG_HOMECACHE=y
+CONFIG_DATAPLANE=y
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+CONFIG_SCHED_HRTICK=y
+# CONFIG_KEXEC is not set
+CONFIG_HIGHMEM=y
+CONFIG_NUMA=y
+CONFIG_NODES_SHIFT=2
+CONFIG_FORCE_MAX_ZONEORDER=9
+# CONFIG_VMSPLIT_375G is not set
+# CONFIG_VMSPLIT_35G is not set
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_3G_OPT is not set
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM_MANUAL=y
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_NEED_MULTIPLE_NODES=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+# CONFIG_KSM is not set
+CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
+# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_FEEDBACK_COLLECT is not set
+CONFIG_FEEDBACK_USE=""
+# CONFIG_HUGEVMAP is not set
+CONFIG_VMALLOC_RESERVE=0x1000000
+CONFIG_HARDWALL=y
+CONFIG_MEMPROF=y
+CONFIG_XGBE_MAIN=y
+CONFIG_NET_TILE=y
+CONFIG_PSEUDO_NAPI=y
+CONFIG_TILEPCI_ENDP=y
+CONFIG_TILE_IDE_GPIO=y
+CONFIG_TILE_SOFTUART=y
+
+#
+# Bus options
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+CONFIG_PCI_DEBUG=y
+# CONFIG_PCI_STUB is not set
+# CONFIG_PCI_IOV is not set
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Executable file formats
+#
+CONFIG_KCORE_ELF=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+CONFIG_INET6_XFRM_MODE_TRANSPORT=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_INET6_XFRM_MODE_BEET=y
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+# CONFIG_IPV6_SIT_6RD is not set
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_IEEE802154 is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_DEVTMPFS is not set
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+
+#
+# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
+#
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_AD525X_DPOT is not set
+# CONFIG_PHANTOM is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_DS1682 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_MAX6875 is not set
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_CB710_CORE is not set
+CONFIG_HAVE_IDE=y
+CONFIG_IDE=y
+
+#
+# Please see Documentation/ide/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_IDE_GD=y
+CONFIG_IDE_GD_ATA=y
+# CONFIG_IDE_GD_ATAPI is not set
+# CONFIG_BLK_DEV_IDECD is not set
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+CONFIG_IDE_PROC_FS=y
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_PLATFORM is not set
+
+#
+# PCI IDE chipsets support
+#
+# CONFIG_BLK_DEV_GENERIC is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_JMICRON is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT8172 is not set
+# CONFIG_BLK_DEV_IT8213 is not set
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_BLK_DEV_TC86C001 is not set
+# CONFIG_BLK_DEV_IDEDMA is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI_MOD=y
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_SCSI_BNX2_ISCSI is not set
+# CONFIG_BE2ISCSI is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_HPSA is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_3W_SAS is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC94XX is not set
+# CONFIG_SCSI_MVSAS is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_ARCMSR is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
+# CONFIG_SCSI_MPT2SAS is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
+# CONFIG_FCOE is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_STEX is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_QLA_ISCSI is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_PMCRAID is not set
+# CONFIG_SCSI_PM8001 is not set
+# CONFIG_SCSI_SRP is not set
+# CONFIG_SCSI_BFA_FC is not set
+# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_ATA_VERBOSE_ERROR=y
+CONFIG_SATA_PMP=y
+# CONFIG_SATA_AHCI is not set
+CONFIG_SATA_SIL24=y
+CONFIG_ATA_SFF=y
+# CONFIG_SATA_SVW is not set
+# CONFIG_ATA_PIIX is not set
+# CONFIG_SATA_MV is not set
+# CONFIG_SATA_NV is not set
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+# CONFIG_SATA_SIL is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+# CONFIG_SATA_VIA is not set
+# CONFIG_SATA_VITESSE is not set
+# CONFIG_SATA_INIC162X is not set
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATP867X is not set
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_LEGACY is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NINJA32 is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_NS87415 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RDC is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_TOSHIBA is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+# CONFIG_PATA_PLATFORM is not set
+# CONFIG_PATA_SCH is not set
+# CONFIG_MD is not set
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# You can enable one or both FireWire driver stacks.
+#
+
+#
+# The newer stack is recommended.
+#
+# CONFIG_FIREWIRE is not set
+# CONFIG_IEEE1394 is not set
+# CONFIG_I2O is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+# CONFIG_VETH is not set
+# CONFIG_ARCNET is not set
+# CONFIG_NET_ETHERNET is not set
+CONFIG_NETDEV_1000=y
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+CONFIG_E1000E=y
+# CONFIG_IP1000 is not set
+# CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
+# CONFIG_SKGE is not set
+# CONFIG_SKY2 is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+# CONFIG_CNIC is not set
+# CONFIG_QLA3XXX is not set
+# CONFIG_ATL1 is not set
+# CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
+# CONFIG_JME is not set
+# CONFIG_NETDEV_10000 is not set
+# CONFIG_TR is not set
+# CONFIG_WLAN is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_VMXNET3 is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+# CONFIG_INPUT_SPARSEKMAP is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_NOZOMI is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_JSM is not set
+# CONFIG_SERIAL_TIMBERDALE is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_HVC_DRIVER=y
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# PCMCIA character devices
+#
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# PC SMBus host controller drivers
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_ISCH is not set
+# CONFIG_I2C_PIIX4 is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_XILINX is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_SPI is not set
+
+#
+# PPS support
+#
+# CONFIG_PPS is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+# CONFIG_ALIM7101_WDT is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_88PM860X is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
+# CONFIG_MFD_MAX8925 is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM831X is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_WM8994 is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_AB3100_CORE is not set
+# CONFIG_LPC_SCH is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_MEDIA_SUPPORT is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGA_ARB is not set
+# CONFIG_DRM is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_UWB is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_INFINIBAND is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+# CONFIG_RTC_INTF_SYSFS is not set
+# CONFIG_RTC_INTF_PROC is not set
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_BQ32K is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+# CONFIG_RTC_DRV_RX8025 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_MSM6242 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_RP5C01 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_UIO is not set
+
+#
+# TI VLYNQ
+#
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_FILE_LOCKING=y
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
+# CONFIG_INOTIFY is not set
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+CONFIG_FUSE_FS=y
+# CONFIG_CUSE is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_LOGFS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CEPH_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=2048
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_STRIP_ASM_SYMS is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_VM=y
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_RING_BUFFER=y
+CONFIG_RING_BUFFER_ALLOW_SWAP=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_FTRACE=y
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_ENABLE_DEFAULT_TRACERS is not set
+# CONFIG_BOOT_TRACER is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+# CONFIG_PROFILE_ALL_BRANCHES is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_RING_BUFFER_BENCHMARK is not set
+# CONFIG_SAMPLES is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUG_EXTRA_FLAGS="-femit-struct-debug-baseonly"
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=m
+CONFIG_CRYPTO_ALGAPI2=m
+CONFIG_CRYPTO_RNG=m
+CONFIG_CRYPTO_RNG2=m
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_PCRYPT is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+CONFIG_CRYPTO_AES=m
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_FIRST_BIT=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h
new file mode 100644
index 0000000..7cdc47b
--- /dev/null
+++ b/arch/tile/include/arch/abi.h
@@ -0,0 +1,93 @@
+// Copyright 2010 Tilera Corporation. All Rights Reserved.
+//
+//   This program is free software; you can redistribute it and/or
+//   modify it under the terms of the GNU General Public License
+//   as published by the Free Software Foundation, version 2.
+//
+//   This program is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+//   NON INFRINGEMENT.  See the GNU General Public License for
+//   more details.
+
+//! @file
+//!
+//! ABI-related register definitions helpful when writing assembly code.
+//!
+
+#ifndef __ARCH_ABI_H__
+#define __ARCH_ABI_H__
+
+#include <arch/chip.h>
+
+// Registers 0 - 55 are "normal", but some perform special roles.
+
+#define TREG_FP       52   /**< Frame pointer. */
+#define TREG_TP       53   /**< Thread pointer. */
+#define TREG_SP       54   /**< Stack pointer. */
+#define TREG_LR       55   /**< Link to calling function PC. */
+
+/** Index of last normal general-purpose register. */
+#define TREG_LAST_GPR 55
+
+// Registers 56 - 62 are "special" network registers.
+
+#define TREG_SN       56   /**< Static network access. */
+#define TREG_IDN0     57   /**< IDN demux 0 access. */
+#define TREG_IDN1     58   /**< IDN demux 1 access. */
+#define TREG_UDN0     59   /**< UDN demux 0 access. */
+#define TREG_UDN1     60   /**< UDN demux 1 access. */
+#define TREG_UDN2     61   /**< UDN demux 2 access. */
+#define TREG_UDN3     62   /**< UDN demux 3 access. */
+
+// Register 63 is the "special" zero register.
+
+#define TREG_ZERO     63   /**< "Zero" register; always reads as "0". */
+
+
+/** By convention, this register is used to hold the syscall number. */
+#define TREG_SYSCALL_NR      10
+
+/** Name of register that holds the syscall number, for use in assembly. */
+#define TREG_SYSCALL_NR_NAME r10
+
+
+//! The ABI requires callers to allocate a caller state save area of
+//! this many bytes at the bottom of each stack frame.
+//!
+#ifdef __tile__
+#define C_ABI_SAVE_AREA_SIZE (2 * __SIZEOF_POINTER__)
+#endif
+
+//! The operand to an 'info' opcode directing the backtracer to not
+//! try to find the calling frame.
+//!
+#define INFO_OP_CANNOT_BACKTRACE 2
+
+#ifndef __ASSEMBLER__
+#if CHIP_WORD_SIZE() > 32
+
+//! Unsigned type that can hold a register.
+typedef unsigned long long uint_reg_t;
+
+//! Signed type that can hold a register.
+typedef long long int_reg_t;
+
+//! String prefix to use for printf().
+#define INT_REG_FMT "ll"
+
+#elif !defined(__LP64__)   /* avoid confusion with LP64 cross-build tools */
+
+//! Unsigned type that can hold a register.
+typedef unsigned long uint_reg_t;
+
+//! Signed type that can hold a register.
+typedef long int_reg_t;
+
+//! String prefix to use for printf().
+#define INT_REG_FMT "l"
+
+#endif
+#endif /* __ASSEMBLER__ */
+
+#endif // !__ARCH_ABI_H__
diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/arch/chip.h
new file mode 100644
index 0000000..926d3db
--- /dev/null
+++ b/arch/tile/include/arch/chip.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#if __tile_chip__ == 0
+#include <arch/chip_tile64.h>
+#elif __tile_chip__ == 1
+#include <arch/chip_tilepro.h>
+#elif defined(__tilegx__)
+#include <arch/chip_tilegx.h>
+#else
+#error Unexpected Tilera chip type
+#endif
diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h
new file mode 100644
index 0000000..18b5bc8
--- /dev/null
+++ b/arch/tile/include/arch/chip_tile64.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILE64.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 0
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tile64"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILE64
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2506
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 2
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 8192
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 0
+
+/** Number of entries in the chip's home map tables. */
+/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 0
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 0
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 0
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 2
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 0
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 0
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 0
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 0
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 0
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 1
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+#define CHIP_L1SNI_CACHE_SIZE() 2048
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ *  dynamic networks?  This encapsulates a number of characteristics,
+ *  including the absence of the catch-all, the absence of inline message
+ *  tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__  /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 8
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 16
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 0
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 0
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 0
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 0
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 0
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h
new file mode 100644
index 0000000..9852af1
--- /dev/null
+++ b/arch/tile/include/arch/chip_tilepro.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILEPro.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 1
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tilepro"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILEPRO
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2507
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 4
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 16384
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 4
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 1
+
+/** Number of entries in the chip's home map tables. */
+#define CHIP_CBOX_HOME_MAP_SIZE() 64
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 1
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 1
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 1
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 4
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 1
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 1
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 1
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 1
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 1
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 0
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 1 */
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ *  dynamic networks?  This encapsulates a number of characteristics,
+ *  including the absence of the catch-all, the absence of inline message
+ *  tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__  /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 16
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 32
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 1
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 1
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 1
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 1
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 1
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 1
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 1
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/arch/interrupts.h
new file mode 100644
index 0000000..20f8f07
--- /dev/null
+++ b/arch/tile/include/arch/interrupts.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifdef __tilegx__
+#include <arch/interrupts_64.h>
+#else
+#include <arch/interrupts_32.h>
+#endif
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h
new file mode 100644
index 0000000..feffada
--- /dev/null
+++ b/arch/tile/include/arch/interrupts_32.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+/** Mask for an interrupt. */
+#ifdef __ASSEMBLER__
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK(intno) (1 << (intno))
+#else
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_ITLB_MISS    0
+#define INT_MEM_ERROR    1
+#define INT_ILL    2
+#define INT_GPV    3
+#define INT_SN_ACCESS    4
+#define INT_IDN_ACCESS    5
+#define INT_UDN_ACCESS    6
+#define INT_IDN_REFILL    7
+#define INT_UDN_REFILL    8
+#define INT_IDN_COMPLETE    9
+#define INT_UDN_COMPLETE   10
+#define INT_SWINT_3   11
+#define INT_SWINT_2   12
+#define INT_SWINT_1   13
+#define INT_SWINT_0   14
+#define INT_UNALIGN_DATA   15
+#define INT_DTLB_MISS   16
+#define INT_DTLB_ACCESS   17
+#define INT_DMATLB_MISS   18
+#define INT_DMATLB_ACCESS   19
+#define INT_SNITLB_MISS   20
+#define INT_SN_NOTIFY   21
+#define INT_SN_FIREWALL   22
+#define INT_IDN_FIREWALL   23
+#define INT_UDN_FIREWALL   24
+#define INT_TILE_TIMER   25
+#define INT_IDN_TIMER   26
+#define INT_UDN_TIMER   27
+#define INT_DMA_NOTIFY   28
+#define INT_IDN_CA   29
+#define INT_UDN_CA   30
+#define INT_IDN_AVAIL   31
+#define INT_UDN_AVAIL   32
+#define INT_PERF_COUNT   33
+#define INT_INTCTRL_3   34
+#define INT_INTCTRL_2   35
+#define INT_INTCTRL_1   36
+#define INT_INTCTRL_0   37
+#define INT_BOOT_ACCESS   38
+#define INT_WORLD_ACCESS   39
+#define INT_I_ASID   40
+#define INT_D_ASID   41
+#define INT_DMA_ASID   42
+#define INT_SNI_ASID   43
+#define INT_DMA_CPL   44
+#define INT_SN_CPL   45
+#define INT_DOUBLE_FAULT   46
+#define INT_SN_STATIC_ACCESS   47
+#define INT_AUX_PERF_COUNT   48
+
+#define NUM_INTERRUPTS 49
+
+#define QUEUED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define NONQUEUED_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define MASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define UNMASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define SYNC_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define NON_SYNC_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#endif // !__ARCH_INTERRUPTS_H__
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h
new file mode 100644
index 0000000..6418fbd
--- /dev/null
+++ b/arch/tile/include/arch/sim_def.h
@@ -0,0 +1,512 @@
+// Copyright 2010 Tilera Corporation. All Rights Reserved.
+//
+//   This program is free software; you can redistribute it and/or
+//   modify it under the terms of the GNU General Public License
+//   as published by the Free Software Foundation, version 2.
+//
+//   This program is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+//   NON INFRINGEMENT.  See the GNU General Public License for
+//   more details.
+
+//! @file
+//!
+//! Some low-level simulator definitions.
+//!
+
+#ifndef __ARCH_SIM_DEF_H__
+#define __ARCH_SIM_DEF_H__
+
+
+//! Internal: the low bits of the SIM_CONTROL_* SPR values specify
+//! the operation to perform, and the remaining bits are
+//! an operation-specific parameter (often unused).
+//!
+#define _SIM_CONTROL_OPERATOR_BITS 8
+
+
+//== Values which can be written to SPR_SIM_CONTROL.
+
+//! If written to SPR_SIM_CONTROL, stops profiling.
+//!
+#define SIM_CONTROL_PROFILER_DISABLE 0
+
+//! If written to SPR_SIM_CONTROL, starts profiling.
+//!
+#define SIM_CONTROL_PROFILER_ENABLE 1
+
+//! If written to SPR_SIM_CONTROL, clears profiling counters.
+//!
+#define SIM_CONTROL_PROFILER_CLEAR 2
+
+//! If written to SPR_SIM_CONTROL, checkpoints the simulator.
+//!
+#define SIM_CONTROL_CHECKPOINT 3
+
+//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+//! sets the tracing mask to the given mask. See "sim_set_tracing()".
+//!
+#define SIM_CONTROL_SET_TRACING 4
+
+//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+//! dumps the requested items of machine state to the log.
+//!
+#define SIM_CONTROL_DUMP 5
+
+//! If written to SPR_SIM_CONTROL, clears chip-level profiling counters.
+//!
+#define SIM_CONTROL_PROFILER_CHIP_CLEAR 6
+
+//! If written to SPR_SIM_CONTROL, disables chip-level profiling.
+//!
+#define SIM_CONTROL_PROFILER_CHIP_DISABLE 7
+
+//! If written to SPR_SIM_CONTROL, enables chip-level profiling.
+//!
+#define SIM_CONTROL_PROFILER_CHIP_ENABLE 8
+
+//! If written to SPR_SIM_CONTROL, enables chip-level functional mode
+//!
+#define SIM_CONTROL_ENABLE_FUNCTIONAL 9
+
+//! If written to SPR_SIM_CONTROL, disables chip-level functional mode.
+//!
+#define SIM_CONTROL_DISABLE_FUNCTIONAL 10
+
+//! If written to SPR_SIM_CONTROL, enables chip-level functional mode.
+//! All tiles must perform this write for functional mode to be enabled.
+//! Ignored in naked boot mode unless --functional is specified.
+//! WARNING: Only the hypervisor startup code should use this!
+//!
+#define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11
+
+//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+//! writes a string directly to the simulator output.  Written to once for
+//! each character in the string, plus a final NUL.  Instead of NUL,
+//! you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY".
+//!
+// ISSUE: Document the meaning of "newline", and the handling of NUL.
+//
+#define SIM_CONTROL_PUTC 12
+
+//! If written to SPR_SIM_CONTROL, clears the --grind-coherence state for
+//! this core.  This is intended to be used before a loop that will
+//! invalidate the cache by loading new data and evicting all current data.
+//! Generally speaking, this API should only be used by system code.
+//!
+#define SIM_CONTROL_GRINDER_CLEAR 13
+
+//! If written to SPR_SIM_CONTROL, shuts down the simulator.
+//!
+#define SIM_CONTROL_SHUTDOWN 14
+
+//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+//! indicates that a fork syscall just created the given process.
+//!
+#define SIM_CONTROL_OS_FORK 15
+
+//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+//! indicates that an exit syscall was just executed by the given process.
+//!
+#define SIM_CONTROL_OS_EXIT 16
+
+//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+//! indicates that the OS just switched to the given process.
+//!
+#define SIM_CONTROL_OS_SWITCH 17
+
+//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+//! indicates that an exec syscall was just executed. Written to once for
+//! each character in the executable name, plus a final NUL.
+//!
+#define SIM_CONTROL_OS_EXEC 18
+
+//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+//! indicates that an interpreter (PT_INTERP) was loaded.  Written to once
+//! for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a
+//! hex load address starting with "0x", and "PATH" is the executable name.
+//!
+#define SIM_CONTROL_OS_INTERP 19
+
+//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+//! indicates that a dll was loaded.  Written to once for each character
+//! in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load
+//! address starting with "0x", and "PATH" is the executable name.
+//!
+#define SIM_CONTROL_DLOPEN 20
+
+//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+//! indicates that a dll was unloaded.  Written to once for each character
+//! in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load
+//! address starting with "0x".
+//!
+#define SIM_CONTROL_DLCLOSE 21
+
+//! If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8),
+//! indicates whether to allow data reads to remotely-cached
+//! dirty cache lines to be cached locally without grinder warnings or
+//! assertions (used by Linux kernel fast memcpy).
+//!
+#define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22
+
+//! If written to SPR_SIM_CONTROL, enables memory tracing.
+//!
+#define SIM_CONTROL_ENABLE_MEM_LOGGING 23
+
+//! If written to SPR_SIM_CONTROL, disables memory tracing.
+//!
+#define SIM_CONTROL_DISABLE_MEM_LOGGING 24
+
+//! If written to SPR_SIM_CONTROL, changes the shaping parameters of one of
+//! the gbe or xgbe shims. Must specify the shim id, the type, the units, and
+//! the rate, as defined in SIM_SHAPING_SPR_ARG.
+//!
+#define SIM_CONTROL_SHAPING 25
+
+//! If written to SPR_SIM_CONTROL, combined with character (shifted by 8),
+//! requests that a simulator command be executed.  Written to once for each
+//! character in the command, plus a final NUL.
+//!
+#define SIM_CONTROL_COMMAND 26
+
+//! If written to SPR_SIM_CONTROL, indicates that the simulated system
+//! is panicking, to allow debugging via --debug-on-panic.
+//!
+#define SIM_CONTROL_PANIC 27
+
+//! If written to SPR_SIM_CONTROL, triggers a simulator syscall.
+//! See "sim_syscall()" for more info.
+//!
+#define SIM_CONTROL_SYSCALL 32
+
+//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+//! provides the pid that subsequent SIM_CONTROL_OS_FORK writes should
+//! use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH.
+//!
+#define SIM_CONTROL_OS_FORK_PARENT 33
+
+//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+//! (shifted by 8), clears the pending magic data section.  The cleared
+//! pending magic data section and any subsequently appended magic bytes
+//! will only take effect when the classifier blast programmer is run.
+#define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34
+
+//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+//! (shifted by 8) and a byte of data (shifted by 16), appends that byte
+//! to the shim's pending magic data section.  The pending magic data
+//! section takes effect when the classifier blast programmer is run.
+#define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35
+
+//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+//! (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a
+//! mask of links (shifted by 32), enable or disable the corresponding
+//! mPIPE links.
+#define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36
+
+//== Syscall numbers for use with "sim_syscall()".
+
+//! Syscall number for sim_add_watchpoint().
+//!
+#define SIM_SYSCALL_ADD_WATCHPOINT 2
+
+//! Syscall number for sim_remove_watchpoint().
+//!
+#define SIM_SYSCALL_REMOVE_WATCHPOINT 3
+
+//! Syscall number for sim_query_watchpoint().
+//!
+#define SIM_SYSCALL_QUERY_WATCHPOINT 4
+
+//! Syscall number that asserts that the cache lines whose 64-bit PA
+//! is passed as the second argument to sim_syscall(), and over a
+//! range passed as the third argument, are no longer in cache.
+//! The simulator raises an error if this is not the case.
+//!
+#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5
+
+
+//== Bit masks which can be shifted by 8, combined with
+//== SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL.
+
+//! @addtogroup arch_sim
+//! @{
+
+//! Enable --trace-cycle when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_CYCLES          0x01
+
+//! Enable --trace-router when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_ROUTER          0x02
+
+//! Enable --trace-register-writes when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_REGISTER_WRITES 0x04
+
+//! Enable --trace-disasm when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_DISASM          0x08
+
+//! Enable --trace-stall-info when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_STALL_INFO      0x10
+
+//! Enable --trace-memory-controller when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_MEMORY_CONTROLLER 0x20
+
+//! Enable --trace-l2 when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_L2_CACHE 0x40
+
+//! Enable --trace-lines when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_LINES 0x80
+
+//! Turn off all tracing when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_NONE 0
+
+//! Turn on all tracing when passed to simulator_set_tracing().
+//!
+#define SIM_TRACE_ALL (-1)
+
+//! @}
+
+//! Computes the value to write to SPR_SIM_CONTROL to set tracing flags.
+//!
+#define SIM_TRACE_SPR_ARG(mask) \
+  (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+//== Bit masks which can be shifted by 8, combined with
+//== SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL.
+
+//! @addtogroup arch_sim
+//! @{
+
+//! Dump the general-purpose registers.
+//!
+#define SIM_DUMP_REGS          0x001
+
+//! Dump the SPRs.
+//!
+#define SIM_DUMP_SPRS          0x002
+
+//! Dump the ITLB.
+//!
+#define SIM_DUMP_ITLB          0x004
+
+//! Dump the DTLB.
+//!
+#define SIM_DUMP_DTLB          0x008
+
+//! Dump the L1 I-cache.
+//!
+#define SIM_DUMP_L1I           0x010
+
+//! Dump the L1 D-cache.
+//!
+#define SIM_DUMP_L1D           0x020
+
+//! Dump the L2 cache.
+//!
+#define SIM_DUMP_L2            0x040
+
+//! Dump the switch registers.
+//!
+#define SIM_DUMP_SNREGS        0x080
+
+//! Dump the switch ITLB.
+//!
+#define SIM_DUMP_SNITLB        0x100
+
+//! Dump the switch L1 I-cache.
+//!
+#define SIM_DUMP_SNL1I         0x200
+
+//! Dump the current backtrace.
+//!
+#define SIM_DUMP_BACKTRACE     0x400
+
+//! Only dump valid lines in caches.
+//!
+#define SIM_DUMP_VALID_LINES   0x800
+
+//! Dump everything that is dumpable.
+//!
+#define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES)
+
+// @}
+
+//! Computes the value to write to SPR_SIM_CONTROL to dump machine state.
+//!
+#define SIM_DUMP_SPR_ARG(mask) \
+  (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+//== Bit masks which can be shifted by 8, combined with
+//== SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL.
+
+//! @addtogroup arch_sim
+//! @{
+
+//! Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers.
+//!
+#define SIM_CHIP_MEMCTL        0x001
+
+//! Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface.
+//!
+#define SIM_CHIP_XAUI          0x002
+
+//! Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface.
+//!
+#define SIM_CHIP_PCIE          0x004
+
+//! Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface.
+//!
+#define SIM_CHIP_MPIPE         0x008
+
+//! Reference all chip devices.
+//!
+#define SIM_CHIP_ALL (-1)
+
+//! @}
+
+//! Computes the value to write to SPR_SIM_CONTROL to clear chip statistics.
+//!
+#define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+//! Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.
+//!
+#define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+//! Computes the value to write to SPR_SIM_CONTROL to enable chip statistics.
+//!
+#define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+
+// Shim bitrate controls.
+
+//! The number of bits used to store the shim id.
+//!
+#define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3
+
+//! @addtogroup arch_sim
+//! @{
+
+//! Change the gbe 0 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_GBE_0 0x0
+
+//! Change the gbe 1 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_GBE_1 0x1
+
+//! Change the gbe 2 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_GBE_2 0x2
+
+//! Change the gbe 3 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_GBE_3 0x3
+
+//! Change the xgbe 0 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_XGBE_0 0x4
+
+//! Change the xgbe 1 bitrate.
+//!
+#define SIM_CONTROL_SHAPING_XGBE_1 0x5
+
+//! The type of shaping to do.
+//!
+#define SIM_CONTROL_SHAPING_TYPE_BITS 2
+
+//! Control the multiplier.
+//!
+#define SIM_CONTROL_SHAPING_MULTIPLIER 0
+
+//! Control the PPS.
+//!
+#define SIM_CONTROL_SHAPING_PPS 1
+
+//! Control the BPS.
+//!
+#define SIM_CONTROL_SHAPING_BPS 2
+
+//! The number of bits for the units for the shaping parameter.
+//!
+#define SIM_CONTROL_SHAPING_UNITS_BITS 2
+
+//! Provide a number in single units.
+//!
+#define SIM_CONTROL_SHAPING_UNITS_SINGLE 0
+
+//! Provide a number in kilo units.
+//!
+#define SIM_CONTROL_SHAPING_UNITS_KILO 1
+
+//! Provide a number in mega units.
+//!
+#define SIM_CONTROL_SHAPING_UNITS_MEGA 2
+
+//! Provide a number in giga units.
+//!
+#define SIM_CONTROL_SHAPING_UNITS_GIGA 3
+
+// @}
+
+//! How many bits are available for the rate.
+//!
+#define SIM_CONTROL_SHAPING_RATE_BITS \
+  (32 - (_SIM_CONTROL_OPERATOR_BITS + \
+         SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+         SIM_CONTROL_SHAPING_TYPE_BITS + \
+         SIM_CONTROL_SHAPING_UNITS_BITS))
+
+//! Computes the value to write to SPR_SIM_CONTROL to change a bitrate.
+//!
+#define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \
+  (SIM_CONTROL_SHAPING | \
+   ((shim) | \
+   ((type) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS)) | \
+   ((units) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+                SIM_CONTROL_SHAPING_TYPE_BITS)) | \
+   ((rate) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+               SIM_CONTROL_SHAPING_TYPE_BITS + \
+               SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS)
+
+
+//== Values returned when reading SPR_SIM_CONTROL.
+// ISSUE: These names should share a longer common prefix.
+
+//! When reading SPR_SIM_CONTROL, the mask of simulator tracing bits
+//! (SIM_TRACE_xxx values).
+//!
+#define SIM_TRACE_FLAG_MASK 0xFFFF
+
+//! When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled.
+//!
+#define SIM_PROFILER_ENABLED_MASK 0x10000
+
+
+//== Special arguments for "SIM_CONTROL_PUTC".
+
+//! Flag value for forcing a PUTC string-flush, including
+//! coordinate/cycle prefix and newline.
+//!
+#define SIM_PUTC_FLUSH_STRING 0x100
+
+//! Flag value for forcing a PUTC binary-data-flush, which skips the
+//! prefix and does not append a newline.
+//!
+#define SIM_PUTC_FLUSH_BINARY 0x101
+
+
+#endif //__ARCH_SIM_DEF_H__
diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
new file mode 100644
index 0000000..c8fdbd9
--- /dev/null
+++ b/arch/tile/include/arch/spr_def.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifdef __tilegx__
+#include <arch/spr_def_64.h>
+#else
+#include <arch/spr_def_32.h>
+#endif
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h
new file mode 100644
index 0000000..b4fc068
--- /dev/null
+++ b/arch/tile/include/arch/spr_def_32.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __DOXYGEN__
+
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
+
+#define SPR_AUX_PERF_COUNT_0 0x6005
+#define SPR_AUX_PERF_COUNT_1 0x6006
+#define SPR_AUX_PERF_COUNT_CTL 0x6007
+#define SPR_AUX_PERF_COUNT_STS 0x6008
+#define SPR_CYCLE_HIGH 0x4e06
+#define SPR_CYCLE_LOW 0x4e07
+#define SPR_DMA_BYTE 0x3900
+#define SPR_DMA_CHUNK_SIZE 0x3901
+#define SPR_DMA_CTR 0x3902
+#define SPR_DMA_CTR__REQUEST_MASK  0x1
+#define SPR_DMA_CTR__SUSPEND_MASK  0x2
+#define SPR_DMA_DST_ADDR 0x3903
+#define SPR_DMA_DST_CHUNK_ADDR 0x3904
+#define SPR_DMA_SRC_ADDR 0x3905
+#define SPR_DMA_SRC_CHUNK_ADDR 0x3906
+#define SPR_DMA_STATUS__DONE_MASK  0x1
+#define SPR_DMA_STATUS__BUSY_MASK  0x2
+#define SPR_DMA_STATUS__RUNNING_MASK  0x10
+#define SPR_DMA_STRIDE 0x3907
+#define SPR_DMA_USER_STATUS 0x3908
+#define SPR_DONE 0x4e08
+#define SPR_EVENT_BEGIN 0x4e0d
+#define SPR_EVENT_END 0x4e0e
+#define SPR_EX_CONTEXT_0_0 0x4a05
+#define SPR_EX_CONTEXT_0_1 0x4a06
+#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_0_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_0_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_1_0 0x4805
+#define SPR_EX_CONTEXT_1_1 0x4806
+#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_1_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_1_1__ICS_MASK  0x4
+#define SPR_FAIL 0x4e09
+#define SPR_INTCTRL_0_STATUS 0x4a07
+#define SPR_INTCTRL_1_STATUS 0x4807
+#define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a
+#define SPR_INTERRUPT_MASK_0_0 0x4a08
+#define SPR_INTERRUPT_MASK_0_1 0x4a09
+#define SPR_INTERRUPT_MASK_1_0 0x4809
+#define SPR_INTERRUPT_MASK_1_1 0x480a
+#define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a
+#define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b
+#define SPR_INTERRUPT_MASK_RESET_1_0 0x480b
+#define SPR_INTERRUPT_MASK_RESET_1_1 0x480c
+#define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c
+#define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d
+#define SPR_INTERRUPT_MASK_SET_1_0 0x480d
+#define SPR_INTERRUPT_MASK_SET_1_1 0x480e
+#define SPR_MPL_DMA_CPL_SET_0 0x5800
+#define SPR_MPL_DMA_CPL_SET_1 0x5801
+#define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
+#define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
+#define SPR_MPL_INTCTRL_0_SET_0 0x4a00
+#define SPR_MPL_INTCTRL_0_SET_1 0x4a01
+#define SPR_MPL_INTCTRL_1_SET_0 0x4800
+#define SPR_MPL_INTCTRL_1_SET_1 0x4801
+#define SPR_MPL_SN_ACCESS_SET_0 0x0800
+#define SPR_MPL_SN_ACCESS_SET_1 0x0801
+#define SPR_MPL_SN_CPL_SET_0 0x5a00
+#define SPR_MPL_SN_CPL_SET_1 0x5a01
+#define SPR_MPL_SN_FIREWALL_SET_0 0x2c00
+#define SPR_MPL_SN_FIREWALL_SET_1 0x2c01
+#define SPR_MPL_SN_NOTIFY_SET_0 0x2a00
+#define SPR_MPL_SN_NOTIFY_SET_1 0x2a01
+#define SPR_MPL_UDN_ACCESS_SET_0 0x0c00
+#define SPR_MPL_UDN_ACCESS_SET_1 0x0c01
+#define SPR_MPL_UDN_AVAIL_SET_0 0x4000
+#define SPR_MPL_UDN_AVAIL_SET_1 0x4001
+#define SPR_MPL_UDN_CA_SET_0 0x3c00
+#define SPR_MPL_UDN_CA_SET_1 0x3c01
+#define SPR_MPL_UDN_COMPLETE_SET_0 0x1400
+#define SPR_MPL_UDN_COMPLETE_SET_1 0x1401
+#define SPR_MPL_UDN_FIREWALL_SET_0 0x3000
+#define SPR_MPL_UDN_FIREWALL_SET_1 0x3001
+#define SPR_MPL_UDN_REFILL_SET_0 0x1000
+#define SPR_MPL_UDN_REFILL_SET_1 0x1001
+#define SPR_MPL_UDN_TIMER_SET_0 0x3600
+#define SPR_MPL_UDN_TIMER_SET_1 0x3601
+#define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00
+#define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01
+#define SPR_PASS 0x4e0b
+#define SPR_PERF_COUNT_0 0x4205
+#define SPR_PERF_COUNT_1 0x4206
+#define SPR_PERF_COUNT_CTL 0x4207
+#define SPR_PERF_COUNT_STS 0x4208
+#define SPR_PROC_STATUS 0x4f00
+#define SPR_SIM_CONTROL 0x4e0c
+#define SPR_SNCTL 0x0805
+#define SPR_SNCTL__FRZFABRIC_MASK  0x1
+#define SPR_SNCTL__FRZPROC_MASK  0x2
+#define SPR_SNPC 0x080b
+#define SPR_SNSTATIC 0x080c
+#define SPR_SYSTEM_SAVE_0_0 0x4b00
+#define SPR_SYSTEM_SAVE_0_1 0x4b01
+#define SPR_SYSTEM_SAVE_0_2 0x4b02
+#define SPR_SYSTEM_SAVE_0_3 0x4b03
+#define SPR_SYSTEM_SAVE_1_0 0x4900
+#define SPR_SYSTEM_SAVE_1_1 0x4901
+#define SPR_SYSTEM_SAVE_1_2 0x4902
+#define SPR_SYSTEM_SAVE_1_3 0x4903
+#define SPR_TILE_COORD 0x4c17
+#define SPR_TILE_RTF_HWM 0x4e10
+#define SPR_TILE_TIMER_CONTROL 0x3205
+#define SPR_TILE_WRITE_PENDING 0x4e0f
+#define SPR_UDN_AVAIL_EN 0x4005
+#define SPR_UDN_CA_DATA 0x0d00
+#define SPR_UDN_DATA_AVAIL 0x0d03
+#define SPR_UDN_DEADLOCK_TIMEOUT 0x3606
+#define SPR_UDN_DEMUX_CA_COUNT 0x0c05
+#define SPR_UDN_DEMUX_COUNT_0 0x0c06
+#define SPR_UDN_DEMUX_COUNT_1 0x0c07
+#define SPR_UDN_DEMUX_COUNT_2 0x0c08
+#define SPR_UDN_DEMUX_COUNT_3 0x0c09
+#define SPR_UDN_DEMUX_CTL 0x0c0a
+#define SPR_UDN_DEMUX_QUEUE_SEL 0x0c0c
+#define SPR_UDN_DEMUX_STATUS 0x0c0d
+#define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e
+#define SPR_UDN_DIRECTION_PROTECT 0x3005
+#define SPR_UDN_REFILL_EN 0x1005
+#define SPR_UDN_SP_FIFO_DATA 0x0c11
+#define SPR_UDN_SP_FIFO_SEL 0x0c12
+#define SPR_UDN_SP_FREEZE 0x0c13
+#define SPR_UDN_SP_FREEZE__SP_FRZ_MASK  0x1
+#define SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK  0x2
+#define SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK  0x4
+#define SPR_UDN_SP_STATE 0x0c14
+#define SPR_UDN_TAG_0 0x0c15
+#define SPR_UDN_TAG_1 0x0c16
+#define SPR_UDN_TAG_2 0x0c17
+#define SPR_UDN_TAG_3 0x0c18
+#define SPR_UDN_TAG_VALID 0x0c19
+#define SPR_UDN_TILE_COORD 0x0c1a
+
+#endif /* !defined(__ARCH_SPR_DEF_H__) */
+
+#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
new file mode 100644
index 0000000..3b8f55b
--- /dev/null
+++ b/arch/tile/include/asm/Kbuild
@@ -0,0 +1,3 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += ucontext.h
diff --git a/arch/tile/include/asm/asm-offsets.h b/arch/tile/include/asm/asm-offsets.h
new file mode 100644
index 0000000..d370ee3
--- /dev/null
+++ b/arch/tile/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
new file mode 100644
index 0000000..b8c49f9
--- /dev/null
+++ b/arch/tile/include/asm/atomic.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Atomic primitives.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_H
+#define _ASM_TILE_ATOMIC_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <asm/system.h>
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+       return v->counter;
+}
+
+/**
+ * atomic_sub_return - subtract integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to subtract
+ *
+ * Atomically subtracts @i from @v and returns @v - @i
+ */
+#define atomic_sub_return(i, v)		atomic_add_return((int)(-(i)), (v))
+
+/**
+ * atomic_sub - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+#define atomic_sub(i, v)		atomic_add((int)(-(i)), (v))
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns true if the result is
+ * zero, or false for all other cases.
+ */
+#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
+
+/**
+ * atomic_inc_return - increment memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns the new value.
+ */
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+/**
+ * atomic_dec_return - decrement memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns the new value.
+ */
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+#define atomic_inc(v)			atomic_add(1, (v))
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1.
+ */
+#define atomic_dec(v)			atomic_sub(1, (v))
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns true if the result is 0.
+ */
+#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns true if the result is 0.
+ */
+#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns true if the result is
+ * negative, or false when result is greater than or equal to zero.
+ */
+#define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
+
+/**
+ * atomic_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1, so long as @v is non-zero.
+ * Returns non-zero if @v was non-zero, and zero otherwise.
+ */
+#define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
+
+
+/*
+ * We define xchg() and cmpxchg() in the included headers.
+ * Note that we do not define __HAVE_ARCH_CMPXCHG, since that would imply
+ * that cmpxchg() is an efficient operation, which is not particularly true.
+ */
+
+/* Nonexistent functions intended to cause link errors. */
+extern unsigned long __xchg_called_with_bad_pointer(void);
+extern unsigned long __cmpxchg_called_with_bad_pointer(void);
+
+#define tas(ptr) (xchg((ptr), 1))
+
+#endif /* __ASSEMBLY__ */
+
+#ifndef __tilegx__
+#include <asm/atomic_32.h>
+#else
+#include <asm/atomic_64.h>
+#endif
+
+/* Provide the appropriate atomic_long_t definitions. */
+#ifndef __ASSEMBLY__
+#include <asm-generic/atomic-long.h>
+#endif
+
+#endif /* _ASM_TILE_ATOMIC_H */
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
new file mode 100644
index 0000000..e4f8b4f
--- /dev/null
+++ b/arch/tile/include/asm/atomic_32.h
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Do not include directly; use <asm/atomic.h>.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_32_H
+#define _ASM_TILE_ATOMIC_32_H
+
+#include <arch/chip.h>
+
+#ifndef __ASSEMBLY__
+
+/* Tile-specific routines to support <asm/atomic.h>. */
+int _atomic_xchg(atomic_t *v, int n);
+int _atomic_xchg_add(atomic_t *v, int i);
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
+int _atomic_cmpxchg(atomic_t *v, int o, int n);
+
+/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg(v, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add(int i, atomic_t *v)
+{
+	_atomic_xchg_add(v, i);
+}
+
+/**
+ * atomic_add_return - add integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg_add(v, i) + i;
+}
+
+/**
+ * atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg_add_unless(v, a, u) != u;
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic_set(atomic_t *v, int n)
+{
+	_atomic_xchg(v, n);
+}
+
+#define xchg(ptr, x) ((typeof(*(ptr))) \
+  ((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
+   atomic_xchg((atomic_t *)(ptr), (long)(x)) : \
+   __xchg_called_with_bad_pointer()))
+
+#define cmpxchg(ptr, o, n) ((typeof(*(ptr))) \
+  ((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
+   atomic_cmpxchg((atomic_t *)(ptr), (long)(o), (long)(n)) : \
+   __cmpxchg_called_with_bad_pointer()))
+
+/* A 64bit atomic type */
+
+typedef struct {
+	u64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n);
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
+
+/**
+ * atomic64_read - read atomic variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline u64 atomic64_read(const atomic64_t *v)
+{
+	/*
+	 * Requires an atomic op to read both 32-bit parts consistently.
+	 * Casting away const is safe since the atomic support routines
+	 * do not write to memory if the value has not been modified.
+	 */
+	return _atomic64_xchg_add((atomic64_t *)v, 0);
+}
+
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg(v, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic64_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic64_add(u64 i, atomic64_t *v)
+{
+	_atomic64_xchg_add(v, i);
+}
+
+/**
+ * atomic64_add_return - add integer and return
+ * @v: pointer of type atomic64_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg_add(v, i) + i;
+}
+
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg_add_unless(v, a, u) != u;
+}
+
+/**
+ * atomic64_set - set atomic variable
+ * @v: pointer of type atomic64_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic64_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic64_set(atomic64_t *v, u64 n)
+{
+	_atomic64_xchg(v, n);
+}
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_sub(i, v)		atomic64_add(-(i), (v))
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
+
+/*
+ * We need to barrier before modifying the word, since the _atomic_xxx()
+ * routines just tns the lock and then read/modify/write of the word.
+ * But after the word is updated, the routine issues an "mf" before returning,
+ * and since it's a function call, we don't even need a compiler barrier.
+ */
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_dec()	do { } while (0)
+#define smp_mb__after_atomic_inc()	do { } while (0)
+
+
+/*
+ * Support "tns" atomic integers.  These are atomic integers that can
+ * hold any value but "1".  They are more efficient than regular atomic
+ * operations because the "lock" (aka acquire) step is a single "tns"
+ * in the uncontended case, and the "unlock" (aka release) step is a
+ * single "store" without an mf.  (However, note that on tilepro the
+ * "tns" will evict the local cache line, so it's not all upside.)
+ *
+ * Note that you can ONLY observe the value stored in the pointer
+ * using these operations; a direct read of the value may confusingly
+ * return the special value "1".
+ */
+
+int __tns_atomic_acquire(atomic_t *);
+void __tns_atomic_release(atomic_t *p, int v);
+
+static inline void tns_atomic_set(atomic_t *v, int i)
+{
+	__tns_atomic_acquire(v);
+	__tns_atomic_release(v, i);
+}
+
+static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	int ret = __tns_atomic_acquire(v);
+	__tns_atomic_release(v, (ret == o) ? n : ret);
+	return ret;
+}
+
+static inline int tns_atomic_xchg(atomic_t *v, int n)
+{
+	int ret = __tns_atomic_acquire(v);
+	__tns_atomic_release(v, n);
+	return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Internal definitions only beyond this point.
+ */
+
+#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
+  (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
+
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/* Number of entries in atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L1_SHIFT 6
+#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
+
+/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
+#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * Number of atomic locks in atomic_locks[]. Must be a power of two.
+ * There is no reason for more than PAGE_SIZE / 8 entries, since that
+ * is the maximum number of pointer bits we can use to index this.
+ * And we cannot have more than PAGE_SIZE / 4, since this has to
+ * fit on a single page and each entry takes 4 bytes.
+ */
+#define ATOMIC_HASH_SHIFT (PAGE_SHIFT - 3)
+#define ATOMIC_HASH_SIZE (1 << ATOMIC_HASH_SHIFT)
+
+#ifndef __ASSEMBLY__
+extern int atomic_locks[];
+#endif
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * All the code that may fault while holding an atomic lock must
+ * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
+ * can correctly release and reacquire the lock.  Note that we
+ * mention the register number in a comment in "lib/atomic_asm.S" to help
+ * assembly coders from using this register by mistake, so if it
+ * is changed here, change that comment as well.
+ */
+#define ATOMIC_LOCK_REG 20
+#define ATOMIC_LOCK_REG_NAME r20
+
+#ifndef __ASSEMBLY__
+/* Called from setup to initialize a hash table to point to per_cpu locks. */
+void __init_atomic_per_cpu(void);
+
+#ifdef CONFIG_SMP
+/* Support releasing the atomic lock in do_page_fault_ics(). */
+void __atomic_fault_unlock(int *lock_ptr);
+#endif
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_ATOMIC_32_H */
diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/asm/auxvec.h
new file mode 100644
index 0000000..1d393ed
--- /dev/null
+++ b/arch/tile/include/asm/auxvec.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_AUXVEC_H
+#define _ASM_TILE_AUXVEC_H
+
+/* No extensions to auxvec */
+
+#endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
new file mode 100644
index 0000000..6970bfc
--- /dev/null
+++ b/arch/tile/include/asm/backtrace.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _TILE_BACKTRACE_H
+#define _TILE_BACKTRACE_H
+
+
+
+#include <linux/types.h>
+
+#include <arch/chip.h>
+
+#if CHIP_VA_WIDTH() > 32
+typedef unsigned long long VirtualAddress;
+#else
+typedef unsigned int VirtualAddress;
+#endif
+
+
+/** Reads 'size' bytes from 'address' and writes the data to 'result'.
+ * Returns true if successful, else false (e.g. memory not readable).
+ */
+typedef bool (*BacktraceMemoryReader)(void *result,
+				      VirtualAddress address,
+				      unsigned int size,
+				      void *extra);
+
+typedef struct {
+	/** Current PC. */
+	VirtualAddress pc;
+
+	/** Current stack pointer value. */
+	VirtualAddress sp;
+
+	/** Current frame pointer value (i.e. caller's stack pointer) */
+	VirtualAddress fp;
+
+	/** Internal use only: caller's PC for first frame. */
+	VirtualAddress initial_frame_caller_pc;
+
+	/** Internal use only: callback to read memory. */
+	BacktraceMemoryReader read_memory_func;
+
+	/** Internal use only: arbitrary argument to read_memory_func. */
+	void *read_memory_func_extra;
+
+} BacktraceIterator;
+
+
+/** Initializes a backtracer to start from the given location.
+ *
+ * If the frame pointer cannot be determined it is set to -1.
+ *
+ * @param state The state to be filled in.
+ * @param read_memory_func A callback that reads memory. If NULL, a default
+ *        value is provided.
+ * @param read_memory_func_extra An arbitrary argument to read_memory_func.
+ * @param pc The current PC.
+ * @param lr The current value of the 'lr' register.
+ * @param sp The current value of the 'sp' register.
+ * @param r52 The current value of the 'r52' register.
+ */
+extern void backtrace_init(BacktraceIterator *state,
+			   BacktraceMemoryReader read_memory_func,
+			   void *read_memory_func_extra,
+			   VirtualAddress pc, VirtualAddress lr,
+			   VirtualAddress sp, VirtualAddress r52);
+
+
+/** Advances the backtracing state to the calling frame, returning
+ * true iff successful.
+ */
+extern bool backtrace_next(BacktraceIterator *state);
+
+
+typedef enum {
+
+	/* We have no idea what the caller's pc is. */
+	PC_LOC_UNKNOWN,
+
+	/* The caller's pc is currently in lr. */
+	PC_LOC_IN_LR,
+
+	/* The caller's pc can be found by dereferencing the caller's sp. */
+	PC_LOC_ON_STACK
+
+} CallerPCLocation;
+
+
+typedef enum {
+
+	/* We have no idea what the caller's sp is. */
+	SP_LOC_UNKNOWN,
+
+	/* The caller's sp is currently in r52. */
+	SP_LOC_IN_R52,
+
+	/* The caller's sp can be found by adding a certain constant
+	 * to the current value of sp.
+	 */
+	SP_LOC_OFFSET
+
+} CallerSPLocation;
+
+
+/* Bit values ORed into CALLER_* values for info ops. */
+enum {
+	/* Setting the low bit on any of these values means the info op
+	 * applies only to one bundle ago.
+	 */
+	ONE_BUNDLE_AGO_FLAG = 1,
+
+	/* Setting this bit on a CALLER_SP_* value means the PC is in LR.
+	 * If not set, PC is on the stack.
+	 */
+	PC_IN_LR_FLAG = 2,
+
+	/* This many of the low bits of a CALLER_SP_* value are for the
+	 * flag bits above.
+	 */
+	NUM_INFO_OP_FLAGS = 2,
+
+	/* We cannot have one in the memory pipe so this is the maximum. */
+	MAX_INFO_OPS_PER_BUNDLE = 2
+};
+
+
+/** Internal constants used to define 'info' operands. */
+enum {
+	/* 0 and 1 are reserved, as are all negative numbers. */
+
+	CALLER_UNKNOWN_BASE = 2,
+
+	CALLER_SP_IN_R52_BASE = 4,
+
+	CALLER_SP_OFFSET_BASE = 8
+};
+
+
+/** Current backtracer state describing where it thinks the caller is. */
+typedef struct {
+	/*
+	 * Public fields
+	 */
+
+	/* How do we find the caller's PC? */
+	CallerPCLocation pc_location : 8;
+
+	/* How do we find the caller's SP? */
+	CallerSPLocation sp_location : 8;
+
+	/* If sp_location == SP_LOC_OFFSET, then caller_sp == sp +
+	 * loc->sp_offset. Else this field is undefined.
+	 */
+	uint16_t sp_offset;
+
+	/* In the most recently visited bundle a terminating bundle? */
+	bool at_terminating_bundle;
+
+	/*
+	 * Private fields
+	 */
+
+	/* Will the forward scanner see someone clobbering sp
+	 * (i.e. changing it with something other than addi sp, sp, N?)
+	 */
+	bool sp_clobber_follows;
+
+	/* Operand to next "visible" info op (no more than one bundle past
+	 * the next terminating bundle), or -32768 if none.
+	 */
+	int16_t next_info_operand;
+
+	/* Is the info of in next_info_op in the very next bundle? */
+	bool is_next_info_operand_adjacent;
+
+} CallerLocation;
+
+
+
+
+#endif /* _TILE_BACKTRACE_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
new file mode 100644
index 0000000..84600f3
--- /dev/null
+++ b/arch/tile/include/asm/bitops.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_H
+#define _ASM_TILE_BITOPS_H
+
+#include <linux/types.h>
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#ifdef __tilegx__
+#include <asm/bitops_64.h>
+#else
+#include <asm/bitops_32.h>
+#endif
+
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	return __builtin_ctzl(word);
+}
+
+/**
+ * ffz - find first zero bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+	return __builtin_ctzl(~word);
+}
+
+/**
+ * __fls - find last set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+static inline int ffs(int x)
+{
+	return __builtin_ffs(x);
+}
+
+/**
+ * fls - find last set bit in word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffs, but returns the position of the most significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 32.
+ */
+static inline int fls(int x)
+{
+	return (sizeof(int) * 8) - __builtin_clz(x);
+}
+
+static inline int fls64(__u64 w)
+{
+	return (sizeof(__u64) * 8) - __builtin_clzll(w);
+}
+
+static inline unsigned int hweight32(unsigned int w)
+{
+	return __builtin_popcount(w);
+}
+
+static inline unsigned int hweight16(unsigned int w)
+{
+	return __builtin_popcount(w & 0xffff);
+}
+
+static inline unsigned int hweight8(unsigned int w)
+{
+	return __builtin_popcount(w & 0xff);
+}
+
+static inline unsigned long hweight64(__u64 w)
+{
+	return __builtin_popcountll(w);
+}
+
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/minix.h>
+
+#endif /* _ASM_TILE_BITOPS_H */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
new file mode 100644
index 0000000..7a93c00
--- /dev/null
+++ b/arch/tile/include/asm/bitops_32.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_32_H
+#define _ASM_TILE_BITOPS_32_H
+
+#include <linux/compiler.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+
+/* Tile-specific routines to support <asm/bitops.h>. */
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.
+ * See __set_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.
+ * See __clear_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ *
+ * clear_bit() may not contain a memory barrier, so if it is used for
+ * locking purposes, you should call smp_mb__before_clear_bit() and/or
+ * smp_mb__after_clear_bit() to ensure changes are visible on other cpus.
+ */
+static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * See __change_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_or(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_andn(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(unsigned nr,
+				      volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_xor(addr, mask) & mask) != 0;
+}
+
+/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic.h>. */
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	do {} while (0)
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* _ASM_TILE_BITOPS_32_H */
diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/asm/bitsperlong.h
new file mode 100644
index 0000000..58c771f
--- /dev/null
+++ b/arch/tile/include/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITSPERLONG_H
+#define _ASM_TILE_BITSPERLONG_H
+
+#ifdef __LP64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_TILE_BITSPERLONG_H */
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
new file mode 100644
index 0000000..b12fd89
--- /dev/null
+++ b/arch/tile/include/asm/bug.h
@@ -0,0 +1 @@
+#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
new file mode 100644
index 0000000..61791e1
--- /dev/null
+++ b/arch/tile/include/asm/bugs.h
@@ -0,0 +1 @@
+#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
new file mode 100644
index 0000000..9558416
--- /dev/null
+++ b/arch/tile/include/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
new file mode 100644
index 0000000..c2b7dcf
--- /dev/null
+++ b/arch/tile/include/asm/cache.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CACHE_H
+#define _ASM_TILE_CACHE_H
+
+#include <arch/chip.h>
+
+/* bytes per L1 data cache line */
+#define L1_CACHE_SHIFT		CHIP_L1D_LOG_LINE_SIZE()
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+#define L1_CACHE_ALIGN(x)	(((x)+(L1_CACHE_BYTES-1)) & -L1_CACHE_BYTES)
+
+/* bytes per L1 instruction cache line */
+#define L1I_CACHE_SHIFT		CHIP_L1I_LOG_LINE_SIZE()
+#define L1I_CACHE_BYTES		(1 << L1I_CACHE_SHIFT)
+#define L1I_CACHE_ALIGN(x)	(((x)+(L1I_CACHE_BYTES-1)) & -L1I_CACHE_BYTES)
+
+/* bytes per L2 cache line */
+#define L2_CACHE_SHIFT		CHIP_L2_LOG_LINE_SIZE()
+#define L2_CACHE_BYTES		(1 << L2_CACHE_SHIFT)
+#define L2_CACHE_ALIGN(x)	(((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
+
+/* use the cache line size for the L2, which is where it counts */
+#define SMP_CACHE_BYTES_SHIFT	L2_CACHE_SHIFT
+#define SMP_CACHE_BYTES		L2_CACHE_BYTES
+#define INTERNODE_CACHE_SHIFT   L2_CACHE_SHIFT
+#define INTERNODE_CACHE_BYTES   L2_CACHE_BYTES
+
+/* Group together read-mostly things to avoid cache false sharing */
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+
+/*
+ * Attribute for data that is kept read/write coherent until the end of
+ * initialization, then bumped to read/only incoherent for performance.
+ */
+#define __write_once __attribute__((__section__(".w1data")))
+
+#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
new file mode 100644
index 0000000..7e2096a
--- /dev/null
+++ b/arch/tile/include/asm/cacheflush.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CACHEFLUSH_H
+#define _ASM_TILE_CACHEFLUSH_H
+
+#include <arch/chip.h>
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <asm/system.h>
+
+/* Caches are physically-indexed and so don't need special treatment */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+
+/* See "arch/tile/lib/__invalidate_icache.S". */
+extern void __invalidate_icache(unsigned long start, unsigned long size);
+
+/* Flush the icache just on this cpu */
+static inline void __flush_icache_range(unsigned long start, unsigned long end)
+{
+	__invalidate_icache(start, end - start);
+}
+
+/* Flush the entire icache on this cpu. */
+#define __flush_icache() __flush_icache_range(0, CHIP_L1I_CACHE_SIZE())
+
+#ifdef CONFIG_SMP
+/*
+ * When the kernel writes to its own text we need to do an SMP
+ * broadcast to make the L1I coherent everywhere.  This includes
+ * module load and single step.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+#else
+#define flush_icache_range __flush_icache_range
+#endif
+
+/*
+ * An update to an executable user page requires icache flushing.
+ * We could carefully update only tiles that are running this process,
+ * and rely on the fact that we flush the icache on every context
+ * switch to avoid doing extra work here.  But for now, I'll be
+ * conservative and just do a global icache flush.
+ */
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+				     struct page *page, unsigned long vaddr,
+				     void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+				   (unsigned long) dst + len);
+	}
+}
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy((dst), (src), (len))
+
+/*
+ * Invalidate a VA range; pads to L2 cacheline boundaries.
+ *
+ * Note that on TILE64, __inv_buffer() actually flushes modified
+ * cache lines in addition to invalidating them, i.e., it's the
+ * same as __finv_buffer().
+ */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_inv(next);
+		next += CHIP_INV_STRIDE();
+	}
+}
+
+/* Flush a VA range; pads to L2 cacheline boundaries. */
+static inline void __flush_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_flush(next);
+		next += CHIP_FLUSH_STRIDE();
+	}
+}
+
+/* Flush & invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __finv_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_finv(next);
+		next += CHIP_FINV_STRIDE();
+	}
+}
+
+
+/* Invalidate a VA range, then memory fence. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+	__inv_buffer(buffer, size);
+	mb_incoherent();
+}
+
+/* Flush a VA range, then memory fence. */
+static inline void flush_buffer(void *buffer, size_t size)
+{
+	__flush_buffer(buffer, size);
+	mb_incoherent();
+}
+
+/* Flush & invalidate a VA range, then memory fence. */
+static inline void finv_buffer(void *buffer, size_t size)
+{
+	__finv_buffer(buffer, size);
+	mb_incoherent();
+}
+
+#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
new file mode 100644
index 0000000..a120766
--- /dev/null
+++ b/arch/tile/include/asm/checksum.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CHECKSUM_H
+#define _ASM_TILE_CHECKSUM_H
+
+#include <asm-generic/checksum.h>
+
+/* Allow us to provide a more optimized do_csum(). */
+__wsum do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
new file mode 100644
index 0000000..e133c53
--- /dev/null
+++ b/arch/tile/include/asm/compat.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_COMPAT_H
+#define _ASM_TILE_COMPAT_H
+
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#define COMPAT_USER_HZ	100
+
+/* "long" and pointer-based types are different. */
+typedef s32		compat_long_t;
+typedef u32		compat_ulong_t;
+typedef u32		compat_size_t;
+typedef s32		compat_ssize_t;
+typedef s32		compat_off_t;
+typedef s32		compat_time_t;
+typedef s32		compat_clock_t;
+typedef u32		compat_ino_t;
+typedef u32		compat_caddr_t;
+typedef	u32		compat_uptr_t;
+
+/* Many types are "int" or otherwise the same. */
+typedef __kernel_pid_t compat_pid_t;
+typedef __kernel_uid_t __compat_uid_t;
+typedef __kernel_gid_t __compat_gid_t;
+typedef __kernel_uid32_t __compat_uid32_t;
+typedef __kernel_uid32_t __compat_gid32_t;
+typedef __kernel_mode_t compat_mode_t;
+typedef __kernel_dev_t compat_dev_t;
+typedef __kernel_loff_t compat_loff_t;
+typedef __kernel_nlink_t compat_nlink_t;
+typedef __kernel_ipc_pid_t compat_ipc_pid_t;
+typedef __kernel_daddr_t compat_daddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
+typedef __kernel_timer_t compat_timer_t;
+typedef __kernel_key_t compat_key_t;
+typedef int compat_int_t;
+typedef s64 compat_s64;
+typedef uint compat_uint_t;
+typedef u64 compat_u64;
+
+/* We use the same register dump format in 32-bit images. */
+typedef unsigned long compat_elf_greg_t;
+#define COMPAT_ELF_NGREG (sizeof(struct pt_regs) / sizeof(compat_elf_greg_t))
+typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+struct compat_timespec {
+	compat_time_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t	tv_sec;
+	s32		tv_usec;
+};
+
+struct compat_stat {
+	unsigned int	st_dev;
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_rdev;
+	unsigned int    __pad1;
+	int		st_size;
+	int		st_blksize;
+	int		__pad2;
+	int		st_blocks;
+	int		st_atime;
+	unsigned int	st_atime_nsec;
+	int		st_mtime;
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused[2];
+};
+
+struct compat_stat64 {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned long	st_rdev;
+	long		st_size;
+	unsigned int	st_blksize;
+	unsigned long	st_blocks __attribute__((packed));
+	unsigned int	st_atime;
+	unsigned int	st_atime_nsec;
+	unsigned int	st_mtime;
+	unsigned int	st_mtime_nsec;
+	unsigned int	st_ctime;
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused8;
+};
+
+#define compat_statfs statfs
+
+struct compat_sysctl {
+	unsigned int	name;
+	int		nlen;
+	unsigned int	oldval;
+	unsigned int	oldlenp;
+	unsigned int	newval;
+	unsigned int	newlen;
+	unsigned int	__unused[4];
+};
+
+
+struct compat_flock {
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
+struct compat_flock64 {
+	short		l_type;
+	short		l_whence;
+	compat_loff_t	l_start;
+	compat_loff_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define COMPAT_RLIM_INFINITY		0xffffffff
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32               compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
+
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid32_t uid;
+	__compat_gid32_t gid;
+	__compat_uid32_t cuid;
+	__compat_gid32_t cgid;
+	unsigned short mode;
+	unsigned short __pad1;
+	unsigned short seq;
+	unsigned short __pad2;
+	compat_ulong_t unused1;
+	compat_ulong_t unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	compat_time_t  sem_otime;
+	compat_ulong_t __unused1;
+	compat_time_t  sem_ctime;
+	compat_ulong_t __unused2;
+	compat_ulong_t sem_nsems;
+	compat_ulong_t __unused3;
+	compat_ulong_t __unused4;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	compat_time_t  msg_stime;
+	compat_ulong_t __unused1;
+	compat_time_t  msg_rtime;
+	compat_ulong_t __unused2;
+	compat_time_t  msg_ctime;
+	compat_ulong_t __unused3;
+	compat_ulong_t msg_cbytes;
+	compat_ulong_t msg_qnum;
+	compat_ulong_t msg_qbytes;
+	compat_pid_t   msg_lspid;
+	compat_pid_t   msg_lrpid;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	compat_size_t  shm_segsz;
+	compat_time_t  shm_atime;
+	compat_ulong_t __unused1;
+	compat_time_t  shm_dtime;
+	compat_ulong_t __unused2;
+	compat_time_t  shm_ctime;
+	compat_ulong_t __unused3;
+	compat_pid_t   shm_cpid;
+	compat_pid_t   shm_lpid;
+	compat_ulong_t shm_nattch;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+/* Sign-extend when storing a kernel pointer to a user's ptregs. */
+static inline unsigned long ptr_to_compat_reg(void __user *uptr)
+{
+	return (long)(int)(long)uptr;
+}
+
+static inline void __user *compat_alloc_user_space(long len)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	return (void __user *)regs->sp - len;
+}
+
+static inline int is_compat_task(void)
+{
+	return current_thread_info()->status & TS_COMPAT;
+}
+
+extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
+				 siginfo_t *info, sigset_t *set,
+				 struct pt_regs *regs);
+
+/* Compat syscalls. */
+struct compat_sigaction;
+struct compat_siginfo;
+struct compat_sigaltstack;
+long compat_sys_execve(char __user *path, compat_uptr_t __user *argv,
+		       compat_uptr_t __user *envp);
+long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
+			     struct compat_sigaction __user *oact,
+			     size_t sigsetsize);
+long compat_sys_rt_sigqueueinfo(int pid, int sig,
+				struct compat_siginfo __user *uinfo);
+long compat_sys_rt_sigreturn(void);
+long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+			    struct compat_sigaltstack __user *uoss_ptr);
+long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
+long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
+long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
+			u32 dummy, u32 low, u32 high);
+long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
+			 u32 dummy, u32 low, u32 high);
+long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len);
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+				 u32 offset_lo, u32 offset_hi,
+				 u32 nbytes_lo, u32 nbytes_hi);
+long compat_sys_fallocate(int fd, int mode,
+			  u32 offset_lo, u32 offset_hi,
+			  u32 len_lo, u32 len_hi);
+long compat_sys_stat64(char __user *filename,
+		       struct compat_stat64 __user *statbuf);
+long compat_sys_lstat64(char __user *filename,
+			struct compat_stat64 __user *statbuf);
+long compat_sys_fstat64(unsigned int fd, struct compat_stat64 __user *statbuf);
+long compat_sys_fstatat64(int dfd, char __user *filename,
+			  struct compat_stat64 __user *statbuf, int flag);
+long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+				      struct compat_timespec __user *interval);
+ssize_t compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
+			    size_t count);
+
+/* Versions of compat functions that differ from generic Linux. */
+struct compat_msgbuf;
+long tile_compat_sys_msgsnd(int msqid,
+			    struct compat_msgbuf __user *msgp,
+			    size_t msgsz, int msgflg);
+long tile_compat_sys_msgrcv(int msqid,
+			    struct compat_msgbuf __user *msgp,
+			    size_t msgsz, long msgtyp, int msgflg);
+long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+			    compat_long_t addr, compat_long_t data);
+
+/* Tilera Linux syscalls that don't have "compat" versions. */
+#define compat_sys_raise_fpe sys_raise_fpe
+#define compat_sys_flush_cache sys_flush_cache
+
+#endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
new file mode 100644
index 0000000..6d68ad7
--- /dev/null
+++ b/arch/tile/include/asm/cputime.h
@@ -0,0 +1 @@
+#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/current.h b/arch/tile/include/asm/current.h
new file mode 100644
index 0000000..da21acf
--- /dev/null
+++ b/arch/tile/include/asm/current.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CURRENT_H
+#define _ASM_TILE_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct *get_current(void)
+{
+	return current_thread_info()->task;
+}
+#define current get_current()
+
+/* Return a usable "task_struct" pointer even if the real one is corrupt. */
+struct task_struct *validate_current(void);
+
+#endif /* _ASM_TILE_CURRENT_H */
diff --git a/arch/tile/include/asm/delay.h b/arch/tile/include/asm/delay.h
new file mode 100644
index 0000000..97b0e69
--- /dev/null
+++ b/arch/tile/include/asm/delay.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DELAY_H
+#define _ASM_TILE_DELAY_H
+
+/* Undefined functions to get compile-time errors. */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __ndelay((n) * 1000)) : \
+	__udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __ndelay(n)) : \
+	__ndelay(n))
+
+#endif /* _ASM_TILE_DELAY_H */
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 0000000..f0a4c25
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1 @@
+#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
new file mode 100644
index 0000000..6cd978c
--- /dev/null
+++ b/arch/tile/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
new file mode 100644
index 0000000..cf466b3
--- /dev/null
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DMA_MAPPING_H
+#define _ASM_TILE_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/cache.h>
+#include <linux/io.h>
+
+/*
+ * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
+ * that is used for all the DMA operations.  For now, we don't have an
+ * equivalent on tile, because we only have a single way of doing DMA.
+ * (Tilera bug 7994 to use dma_mapping_ops.)
+ */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+			  enum dma_data_direction);
+extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+			     size_t size, enum dma_data_direction);
+extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	       enum dma_data_direction);
+extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+			 int nhwentries, enum dma_data_direction);
+extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
+			       unsigned long offset, size_t size,
+			       enum dma_data_direction);
+extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+			   size_t size, enum dma_data_direction);
+extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				int nelems, enum dma_data_direction);
+extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				   int nelems, enum dma_data_direction);
+
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle);
+
+extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
+				    enum dma_data_direction);
+extern void dma_sync_single_for_device(struct device *, dma_addr_t,
+				       size_t, enum dma_data_direction);
+extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
+					  unsigned long offset, size_t,
+					  enum dma_data_direction);
+extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
+					     unsigned long offset, size_t,
+					     enum dma_data_direction);
+extern void dma_cache_sync(void *vaddr, size_t, enum dma_data_direction);
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+static inline int
+dma_get_cache_alignment(void)
+{
+	return L2_CACHE_BYTES;
+}
+
+#define dma_is_consistent(d, h)	(1)
+
+
+#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/dma.h b/arch/tile/include/asm/dma.h
new file mode 100644
index 0000000..12a7ca1
--- /dev/null
+++ b/arch/tile/include/asm/dma.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DMA_H
+#define _ASM_TILE_DMA_H
+
+#include <asm-generic/dma.h>
+
+/* Needed by drivers/pci/quirks.c */
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#endif
+
+#endif /* _ASM_TILE_DMA_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
new file mode 100644
index 0000000..1bca0de
--- /dev/null
+++ b/arch/tile/include/asm/elf.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_ELF_H
+#define _ASM_TILE_ELF_H
+
+/*
+ * ELF register definitions.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/ptrace.h>
+#include <asm/byteorder.h>
+#include <asm/page.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#define EM_TILE64  187
+#define EM_TILEPRO 188
+#define EM_TILEGX  191
+
+/* Provide a nominal data structure. */
+#define ELF_NFPREG	0
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+#ifdef __tilegx__
+#define ELF_CLASS	ELFCLASS64
+#else
+#define ELF_CLASS	ELFCLASS32
+#endif
+#define ELF_DATA	ELFDATA2LSB
+
+/*
+ * There seems to be a bug in how compat_binfmt_elf.c works: it
+ * #undefs ELF_ARCH, but it is then used in binfmt_elf.c for fill_note_info().
+ * Hack around this by providing an enum value of ELF_ARCH.
+ */
+enum { ELF_ARCH = CHIP_ELF_TYPE() };
+#define ELF_ARCH ELF_ARCH
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x)  \
+	((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+	 ((x)->e_machine == CHIP_ELF_TYPE() || \
+	  (x)->e_machine == CHIP_COMPAT_ELF_TYPE()))
+
+/* The module loader only handles a few relocation types. */
+#ifndef __tilegx__
+#define R_TILE_32                 1
+#define R_TILE_JOFFLONG_X1       15
+#define R_TILE_IMM16_X0_LO       25
+#define R_TILE_IMM16_X1_LO       26
+#define R_TILE_IMM16_X0_HA       29
+#define R_TILE_IMM16_X1_HA       30
+#else
+#define R_TILEGX_64                       1
+#define R_TILEGX_JUMPOFF_X1              21
+#define R_TILEGX_IMM16_X0_HW0            36
+#define R_TILEGX_IMM16_X1_HW0            37
+#define R_TILEGX_IMM16_X0_HW1            38
+#define R_TILEGX_IMM16_X1_HW1            39
+#define R_TILEGX_IMM16_X0_HW2_LAST       48
+#define R_TILEGX_IMM16_X1_HW2_LAST       49
+#endif
+
+/* Use standard page size for core dumps. */
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *) &_dest, (char *) _regs,			\
+	       sizeof(struct pt_regs));
+
+/* No additional FP registers to copy. */
+#define ELF_CORE_COPY_FPREGS(t, fpu) 0
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP	(0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM  (NULL)
+
+extern void elf_plat_init(struct pt_regs *regs, unsigned long load_addr);
+
+#define ELF_PLAT_INIT(_r, load_addr) elf_plat_init(_r, load_addr)
+
+extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
+#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
+
+/* Tilera Linux has no personalities currently, so no need to do anything. */
+#define SET_PERSONALITY(ex) do { } while (0)
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+/* Support auto-mapping of the user interrupt vectors. */
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int executable_stack);
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_ELF_PLATFORM "tilegx-m32"
+
+/*
+ * "Compat" binaries have the same machine type, but 32-bit class,
+ * since they're not a separate machine type, but just a 32-bit
+ * variant of the standard 64-bit architecture.
+ */
+#define compat_elf_check_arch(x)  \
+	((x)->e_ident[EI_CLASS] == ELFCLASS32 && \
+	 ((x)->e_machine == CHIP_ELF_TYPE() || \
+	  (x)->e_machine == CHIP_COMPAT_ELF_TYPE()))
+
+#define compat_start_thread(regs, ip, usp) do { \
+		regs->pc = ptr_to_compat_reg((void *)(ip)); \
+		regs->sp = ptr_to_compat_reg((void *)(usp)); \
+	} while (0)
+
+/*
+ * Use SET_PERSONALITY to indicate compatibility via TS_COMPAT.
+ */
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex) \
+do { \
+	current->personality = PER_LINUX; \
+	current_thread_info()->status &= ~TS_COMPAT; \
+} while (0)
+#define COMPAT_SET_PERSONALITY(ex) \
+do { \
+	current->personality = PER_LINUX_32BIT; \
+	current_thread_info()->status |= TS_COMPAT; \
+} while (0)
+
+#define COMPAT_ELF_ET_DYN_BASE (0xffffffff / 3 * 2)
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* _ASM_TILE_ELF_H */
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
new file mode 100644
index 0000000..3711bd9
--- /dev/null
+++ b/arch/tile/include/asm/emergency-restart.h
@@ -0,0 +1 @@
+#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
new file mode 100644
index 0000000..4c82b50
--- /dev/null
+++ b/arch/tile/include/asm/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
new file mode 100644
index 0000000..46ab12d
--- /dev/null
+++ b/arch/tile/include/asm/fcntl.h
@@ -0,0 +1 @@
+#include <asm-generic/fcntl.h>
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
new file mode 100644
index 0000000..51537ff
--- /dev/null
+++ b/arch/tile/include/asm/fixmap.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_FIXMAP_H
+#define _ASM_TILE_FIXMAP_H
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of supervisor virtual memory backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ *
+ * We don't bother with a FIX_HOLE since above the fixmaps
+ * is unmapped memory in any case.
+ */
+enum fixed_addresses {
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	__end_of_permanent_fixed_addresses,
+
+	/*
+	 * Temporary boot-time mappings, used before ioremap() is functional.
+	 * Not currently needed by the Tile architecture.
+	 */
+#define NR_FIX_BTMAPS	0
+#if NR_FIX_BTMAPS
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+	__end_of_fixed_addresses
+#else
+	__end_of_fixed_addresses = __end_of_permanent_fixed_addresses
+#endif
+};
+
+extern void __set_fixmap(enum fixed_addresses idx,
+			 unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
+#define clear_fixmap(idx) \
+		__set_fixmap(idx, 0, __pgprot(0))
+
+#define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define __FIXADDR_BOOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START		(FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
+#define FIXADDR_BOOT_START	(FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FIXMAP_H */
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
new file mode 100644
index 0000000..461459b
--- /dev/null
+++ b/arch/tile/include/asm/ftrace.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_FTRACE_H
+#define _ASM_TILE_FTRACE_H
+
+/* empty */
+
+#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
new file mode 100644
index 0000000..9eaeb3c
--- /dev/null
+++ b/arch/tile/include/asm/futex.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * These routines make two important assumptions:
+ *
+ * 1. atomic_t is really an int and can be freely cast back and forth
+ *    (validated in __init_atomic_per_cpu).
+ *
+ * 2. userspace uses sys_cmpxchg() for all atomic operations, thus using
+ *    the same locking convention that all the kernel atomic routines use.
+ */
+
+#ifndef _ASM_TILE_FUTEX_H
+#define _ASM_TILE_FUTEX_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+extern struct __get_user futex_set(int *v, int i);
+extern struct __get_user futex_add(int *v, int n);
+extern struct __get_user futex_or(int *v, int n);
+extern struct __get_user futex_andn(int *v, int n);
+extern struct __get_user futex_cmpxchg(int *v, int o, int n);
+
+#ifndef __tilegx__
+extern struct __get_user futex_xor(int *v, int n);
+#else
+static inline struct __get_user futex_xor(int __user *uaddr, int n)
+{
+	struct __get_user asm_ret = __get_user_4(uaddr);
+	if (!asm_ret.err) {
+		int oldval, newval;
+		do {
+			oldval = asm_ret.val;
+			newval = oldval ^ n;
+			asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+		} while (asm_ret.err == 0 && oldval != asm_ret.val);
+	}
+	return asm_ret;
+}
+#endif
+
+static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int ret;
+	struct __get_user asm_ret;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	pagefault_disable();
+	switch (op) {
+	case FUTEX_OP_SET:
+		asm_ret = futex_set(uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		asm_ret = futex_add(uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		asm_ret = futex_or(uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		asm_ret = futex_andn(uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		asm_ret = futex_xor(uaddr, oparg);
+		break;
+	default:
+		asm_ret.err = -ENOSYS;
+	}
+	pagefault_enable();
+
+	ret = asm_ret.err;
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ:
+			ret = (asm_ret.val == cmparg);
+			break;
+		case FUTEX_OP_CMP_NE:
+			ret = (asm_ret.val != cmparg);
+			break;
+		case FUTEX_OP_CMP_LT:
+			ret = (asm_ret.val < cmparg);
+			break;
+		case FUTEX_OP_CMP_GE:
+			ret = (asm_ret.val >= cmparg);
+			break;
+		case FUTEX_OP_CMP_LE:
+			ret = (asm_ret.val <= cmparg);
+			break;
+		case FUTEX_OP_CMP_GT:
+			ret = (asm_ret.val > cmparg);
+			break;
+		default:
+			ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
+						int newval)
+{
+	struct __get_user asm_ret;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+	return asm_ret.err ? asm_ret.err : asm_ret.val;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FUTEX_H */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
new file mode 100644
index 0000000..822390f
--- /dev/null
+++ b/arch/tile/include/asm/hardirq.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HARDIRQ_H
+#define _ASM_TILE_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/cache.h>
+
+#include <asm/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	long idle_timestamp;
+
+	/* Hard interrupt statistics. */
+	unsigned int irq_timer_count;
+	unsigned int irq_syscall_count;
+	unsigned int irq_resched_count;
+	unsigned int irq_hv_flush_count;
+	unsigned int irq_call_count;
+	unsigned int irq_hv_msg_count;
+	unsigned int irq_dev_intr_count;
+
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+#define HARDIRQ_BITS	8
+
+#endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
new file mode 100644
index 0000000..efdd12e
--- /dev/null
+++ b/arch/tile/include/asm/highmem.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *                   Gerhard.Wichert@pdb.siemens.de
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ */
+
+#ifndef _ASM_TILE_HIGHMEM_H
+#define _ASM_TILE_HIGHMEM_H
+
+#include <linux/interrupt.h>
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+/*
+ * Ordering is:
+ *
+ * FIXADDR_TOP
+ *			fixed_addresses
+ * FIXADDR_START
+ *			temp fixed addresses
+ * FIXADDR_BOOT_START
+ *			Persistent kmap area
+ * PKMAP_BASE
+ * VMALLOC_END
+ *			Vmalloc area
+ * VMALLOC_START
+ * high_memory
+ */
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+void *kmap(struct page *page);
+void kunmap(struct page *page);
+void *kmap_fix_kpte(struct page *page, int finished);
+
+/* This macro is used only in map_new_virtual() to map "page". */
+#define kmap_prot page_to_kpgprot(page)
+
+void kunmap_atomic(void *kvaddr, enum km_type type);
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+struct page *kmap_atomic_to_page(void *ptr);
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
+void *kmap_atomic(struct page *page, enum km_type type);
+void kmap_atomic_fix_kpte(struct page *page, int finished);
+
+#define flush_cache_kmaps()	do { } while (0)
+
+#endif /* _ASM_TILE_HIGHMEM_H */
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
new file mode 100644
index 0000000..a824386
--- /dev/null
+++ b/arch/tile/include/asm/homecache.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Handle issues around the Tile "home cache" model of coherence.
+ */
+
+#ifndef _ASM_TILE_HOMECACHE_H
+#define _ASM_TILE_HOMECACHE_H
+
+#include <asm/page.h>
+#include <linux/cpumask.h>
+
+struct page;
+struct task_struct;
+struct vm_area_struct;
+struct zone;
+
+/*
+ * Coherence point for the page is its memory controller.
+ * It is not present in any cache (L1 or L2).
+ */
+#define PAGE_HOME_UNCACHED -1
+
+/*
+ * Is this page immutable (unwritable) and thus able to be cached more
+ * widely than would otherwise be possible?  On tile64 this means we
+ * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ */
+#define PAGE_HOME_IMMUTABLE -2
+
+/*
+ * Each cpu considers its own cache to be the home for the page,
+ * which makes it incoherent.
+ */
+#define PAGE_HOME_INCOHERENT -3
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Home for the page is distributed via hash-for-home. */
+#define PAGE_HOME_HASH -4
+#endif
+
+/* Homing is unknown or unspecified.  Not valid for page_home(). */
+#define PAGE_HOME_UNKNOWN -5
+
+/* Home on the current cpu.  Not valid for page_home(). */
+#define PAGE_HOME_HERE -6
+
+/* Support wrapper to use instead of explicit hv_flush_remote(). */
+extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
+			 const struct cpumask *cache_cpumask,
+			 HV_VirtAddr tlb_va, unsigned long tlb_length,
+			 unsigned long tlb_pgsize,
+			 const struct cpumask *tlb_cpumask,
+			 HV_Remote_ASID *asids, int asidcount);
+
+/* Set homing-related bits in a PTE (can also pass a pgprot_t). */
+extern pte_t pte_set_home(pte_t pte, int home);
+
+/* Do a cache eviction on the specified cpus. */
+extern void homecache_evict(const struct cpumask *mask);
+
+/*
+ * Change a kernel page's homecache.  It must not be mapped in user space.
+ * If !CONFIG_HOMECACHE, only usable on LOWMEM, and can only be called when
+ * no other cpu can reference the page, and causes a full-chip cache/TLB flush.
+ */
+extern void homecache_change_page_home(struct page *, int order, int home);
+
+/*
+ * Flush a page out of whatever cache(s) it is in.
+ * This is more than just finv, since it properly handles waiting
+ * for the data to reach memory on tilepro, but it can be quite
+ * heavyweight, particularly on hash-for-home memory.
+ */
+extern void homecache_flush_cache(struct page *, int order);
+
+/*
+ * Allocate a page with the given GFP flags, home, and optionally
+ * node.  These routines are actually just wrappers around the normal
+ * alloc_pages() / alloc_pages_node() functions, which set and clear
+ * a per-cpu variable to communicate with homecache_new_kernel_page().
+ * If !CONFIG_HOMECACHE, uses homecache_change_page_home().
+ */
+extern struct page *homecache_alloc_pages(gfp_t gfp_mask,
+					  unsigned int order, int home);
+extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
+					       unsigned int order, int home);
+#define homecache_alloc_page(gfp_mask, home) \
+  homecache_alloc_pages(gfp_mask, 0, home)
+
+/*
+ * These routines are just pass-throughs to free_pages() when
+ * we support full homecaching.  If !CONFIG_HOMECACHE, then these
+ * routines use homecache_change_page_home() to reset the home
+ * back to the default before returning the page to the allocator.
+ */
+void homecache_free_pages(unsigned long addr, unsigned int order);
+#define homecache_free_page(page) \
+  homecache_free_pages((page), 0)
+
+
+
+/*
+ * Report the page home for LOWMEM pages by examining their kernel PTE,
+ * or for highmem pages as the default home.
+ */
+extern int page_home(struct page *);
+
+#define homecache_migrate_kthread() do {} while (0)
+
+#define homecache_kpte_lock() 0
+#define homecache_kpte_unlock(flags) do {} while (0)
+
+
+#endif /* _ASM_TILE_HOMECACHE_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
new file mode 100644
index 0000000..0521c27
--- /dev/null
+++ b/arch/tile/include/asm/hugetlb.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HUGETLB_H
+#define _ASM_TILE_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/hv_driver.h b/arch/tile/include/asm/hv_driver.h
new file mode 100644
index 0000000..ad614de
--- /dev/null
+++ b/arch/tile/include/asm/hv_driver.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This header defines a wrapper interface for managing hypervisor
+ * device calls that will result in an interrupt at some later time.
+ * In particular, this provides wrappers for hv_preada() and
+ * hv_pwritea().
+ */
+
+#ifndef _ASM_TILE_HV_DRIVER_H
+#define _ASM_TILE_HV_DRIVER_H
+
+#include <hv/hypervisor.h>
+
+struct hv_driver_cb;
+
+/* A callback to be invoked when an operation completes. */
+typedef void hv_driver_callback_t(struct hv_driver_cb *cb, __hv32 result);
+
+/*
+ * A structure to hold information about an outstanding call.
+ * The driver must allocate a separate structure for each call.
+ */
+struct hv_driver_cb {
+	hv_driver_callback_t *callback;  /* Function to call on interrupt. */
+	void *dev;                       /* Driver-specific state variable. */
+};
+
+/* Wrapper for invoking hv_dev_preada(). */
+static inline int
+tile_hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+		   HV_SGL sgl[/* sgl_len */], __hv64 offset,
+		   struct hv_driver_cb *callback)
+{
+	return hv_dev_preada(devhdl, flags, sgl_len, sgl,
+			     offset, (HV_IntArg)callback);
+}
+
+/* Wrapper for invoking hv_dev_pwritea(). */
+static inline int
+tile_hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+		    HV_SGL sgl[/* sgl_len */], __hv64 offset,
+		    struct hv_driver_cb *callback)
+{
+	return hv_dev_pwritea(devhdl, flags, sgl_len, sgl,
+			      offset, (HV_IntArg)callback);
+}
+
+
+#endif /* _ASM_TILE_HV_DRIVER_H */
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/hw_irq.h
new file mode 100644
index 0000000..4fac5fb
--- /dev/null
+++ b/arch/tile/include/asm/hw_irq.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HW_IRQ_H
+#define _ASM_TILE_HW_IRQ_H
+
+#endif /* _ASM_TILE_HW_IRQ_H */
diff --git a/arch/tile/include/asm/ide.h b/arch/tile/include/asm/ide.h
new file mode 100644
index 0000000..3c6f2ed
--- /dev/null
+++ b/arch/tile/include/asm/ide.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IDE_H
+#define _ASM_TILE_IDE_H
+
+/* For IDE on PCI */
+#define MAX_HWIFS       10
+
+#define ide_default_io_ctl(base)	(0)
+
+#include <asm-generic/ide_iops.h>
+
+#endif /* _ASM_TILE_IDE_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
new file mode 100644
index 0000000..8c95bef
--- /dev/null
+++ b/arch/tile/include/asm/io.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IO_H
+#define _ASM_TILE_IO_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xfffffffful
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access.
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer.
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+
+/*
+ * Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to
+ * long before casting it to a pointer to avoid compiler warnings.
+ */
+#if CHIP_HAS_MMIO()
+extern void __iomem *ioremap(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+	pgprot_t pgprot);
+extern void iounmap(volatile void __iomem *addr);
+#else
+#define ioremap(physaddr, size)	((void __iomem *)(unsigned long)(physaddr))
+#define iounmap(addr)		((void)0)
+#endif
+
+#define ioremap_nocache(physaddr, size)		ioremap(physaddr, size)
+#define ioremap_writethrough(physaddr, size)	ioremap(physaddr, size)
+#define ioremap_fullcache(physaddr, size)	ioremap(physaddr, size)
+
+void __iomem *ioport_map(unsigned long port, unsigned int len);
+extern inline void ioport_unmap(void __iomem *addr) {}
+
+#define mmiowb()
+
+/* Conversion between virtual and physical mappings.  */
+#define mm_ptov(addr)		((void *)phys_to_virt(addr))
+#define mm_vtop(addr)		((unsigned long)virt_to_phys(addr))
+
+#ifdef CONFIG_PCI
+
+extern u8 _tile_readb(unsigned long addr);
+extern u16 _tile_readw(unsigned long addr);
+extern u32 _tile_readl(unsigned long addr);
+extern u64 _tile_readq(unsigned long addr);
+extern void _tile_writeb(u8  val, unsigned long addr);
+extern void _tile_writew(u16 val, unsigned long addr);
+extern void _tile_writel(u32 val, unsigned long addr);
+extern void _tile_writeq(u64 val, unsigned long addr);
+
+#else
+
+/*
+ * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline int iomem_panic(void)
+{
+	panic("readb/writeb and friends do not exist on tile without PCI");
+	return 0;
+}
+
+static inline u8 _tile_readb(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u16 _tile_readw(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u32 _tile_readl(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u64 _tile_readq(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline void _tile_writeb(u8  val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writew(u16 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writel(u32 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writeq(u64 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+#endif
+
+#define readb(addr) _tile_readb((unsigned long)addr)
+#define readw(addr) _tile_readw((unsigned long)addr)
+#define readl(addr) _tile_readl((unsigned long)addr)
+#define readq(addr) _tile_readq((unsigned long)addr)
+#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+#define __raw_readq readq
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+#define __raw_writeq writeq
+
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+#define readq_relaxed readq
+
+#define ioread8 readb
+#define ioread16 readw
+#define ioread32 readl
+#define ioread64 readq
+#define iowrite8 writeb
+#define iowrite16 writew
+#define iowrite32 writel
+#define iowrite64 writeq
+
+static inline void *memcpy_fromio(void *dst, void *src, int len)
+{
+	int x;
+	BUG_ON((unsigned long)src & 0x3);
+	for (x = 0; x < len; x += 4)
+		*(u32 *)(dst + x) = readl(src + x);
+	return dst;
+}
+
+static inline void *memcpy_toio(void *dst, void *src, int len)
+{
+	int x;
+	BUG_ON((unsigned long)dst & 0x3);
+	for (x = 0; x < len; x += 4)
+		writel(*(u32 *)(src + x), dst + x);
+	return dst;
+}
+
+/*
+ * The Tile architecture does not support IOPORT, even with PCI.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline int ioport_panic(void)
+{
+	panic("inb/outb and friends do not exist on tile");
+	return 0;
+}
+
+static inline u8 inb(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline u16 inw(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline u32 inl(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+#define inb_p(addr)	inb(addr)
+#define inw_p(addr)	inw(addr)
+#define inl_p(addr)	inl(addr)
+#define outb_p(x, addr)	outb((x), (addr))
+#define outw_p(x, addr)	outw((x), (addr))
+#define outl_p(x, addr)	outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+#define ioread8_rep(p, dst, count) \
+	insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+	insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+	insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+	outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+	outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+	outsl((unsigned long) (p), (src), (count))
+
+#endif /* _ASM_TILE_IO_H */
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
new file mode 100644
index 0000000..b279fe0
--- /dev/null
+++ b/arch/tile/include/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
new file mode 100644
index 0000000..ec34c76
--- /dev/null
+++ b/arch/tile/include/asm/ioctls.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
new file mode 100644
index 0000000..a46e3d9
--- /dev/null
+++ b/arch/tile/include/asm/ipc.h
@@ -0,0 +1 @@
+#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
new file mode 100644
index 0000000..84c7e51
--- /dev/null
+++ b/arch/tile/include/asm/ipcbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
new file mode 100644
index 0000000..9be1f84
--- /dev/null
+++ b/arch/tile/include/asm/irq.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IRQ_H
+#define _ASM_TILE_IRQ_H
+
+#include <linux/hardirq.h>
+
+/* The hypervisor interface provides 32 IRQs. */
+#define NR_IRQS 32
+
+/* IRQ numbers used for linux IPIs. */
+#define IRQ_RESCHEDULE 1
+
+/* The HV interrupt state object. */
+DECLARE_PER_CPU(HV_IntrState, dev_intr_state);
+
+void ack_bad_irq(unsigned int irq);
+
+/*
+ * Paravirtualized drivers should call this when their init calls
+ * discover a valid HV IRQ.
+ */
+void tile_irq_activate(unsigned int irq);
+
+#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
new file mode 100644
index 0000000..3dd9c0b
--- /dev/null
+++ b/arch/tile/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
new file mode 100644
index 0000000..cf5bffd
--- /dev/null
+++ b/arch/tile/include/asm/irqflags.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IRQFLAGS_H
+#define _ASM_TILE_IRQFLAGS_H
+
+#include <asm/processor.h>
+#include <arch/interrupts.h>
+#include <arch/chip.h>
+
+/*
+ * The set of interrupts we want to allow when interrupts are nominally
+ * disabled.  The remainder are effectively "NMI" interrupts from
+ * the point of view of the generic Linux code.  Note that synchronous
+ * interrupts (aka "non-queued") are not blocked by the mask in any case.
+ */
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS \
+	(~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS \
+	(~(INT_MASK(INT_PERF_COUNT)))
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */
+#include <asm/percpu.h>
+#include <arch/spr_def.h>
+
+/* Set and clear kernel interrupt masks. */
+#if CHIP_HAS_SPLIT_INTR_MASK()
+#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
+# error Fix assumptions about which word various interrupts are in
+#endif
+#define interrupt_mask_set(n) do { \
+	int __n = (n); \
+	int __mask = 1 << (__n & 0x1f); \
+	if (__n < 32) \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, __mask); \
+	else \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, __mask); \
+} while (0)
+#define interrupt_mask_reset(n) do { \
+	int __n = (n); \
+	int __mask = 1 << (__n & 0x1f); \
+	if (__n < 32) \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, __mask); \
+	else \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, __mask); \
+} while (0)
+#define interrupt_mask_check(n) ({ \
+	int __n = (n); \
+	(((__n < 32) ? \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_1_0) : \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_1_1)) \
+	  >> (__n & 0x1f)) & 1; \
+})
+#define interrupt_mask_set_mask(mask) do { \
+	unsigned long long __m = (mask); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, (unsigned long)(__m>>32)); \
+} while (0)
+#define interrupt_mask_reset_mask(mask) do { \
+	unsigned long long __m = (mask); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, (unsigned long)(__m>>32)); \
+} while (0)
+#else
+#define interrupt_mask_set(n) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (1UL << (n)))
+#define interrupt_mask_reset(n) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (1UL << (n)))
+#define interrupt_mask_check(n) \
+	((__insn_mfspr(SPR_INTERRUPT_MASK_1) >> (n)) & 1)
+#define interrupt_mask_set_mask(mask) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (mask))
+#define interrupt_mask_reset_mask(mask) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (mask))
+#endif
+
+/*
+ * The set of interrupts we want active if irqs are enabled.
+ * Note that in particular, the tile timer interrupt comes and goes
+ * from this set, since we have no other way to turn off the timer.
+ * Likewise, INTCTRL_1 is removed and re-added during device
+ * interrupts, as is the the hardwall UDN_FIREWALL interrupt.
+ * We use a low bit (MEM_ERROR) as our sentinel value and make sure it
+ * is always claimed as an "active interrupt" so we can query that bit
+ * to know our current state.
+ */
+DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
+#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
+
+/* Disable interrupts. */
+#define raw_local_irq_disable() \
+	interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
+
+/* Disable all interrupts, including NMIs. */
+#define raw_local_irq_disable_all() \
+	interrupt_mask_set_mask(-1UL)
+
+/* Re-enable all maskable interrupts. */
+#define raw_local_irq_enable() \
+	interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+
+/* Disable or enable interrupts based on flag argument. */
+#define raw_local_irq_restore(disabled) do { \
+	if (disabled) \
+		raw_local_irq_disable(); \
+	else \
+		raw_local_irq_enable(); \
+} while (0)
+
+/* Return true if "flags" argument means interrupts are disabled. */
+#define raw_irqs_disabled_flags(flags) ((flags) != 0)
+
+/* Return true if interrupts are currently disabled. */
+#define raw_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
+
+/* Save whether interrupts are currently disabled. */
+#define raw_local_save_flags(flags) ((flags) = raw_irqs_disabled())
+
+/* Save whether interrupts are currently disabled, then disable them. */
+#define raw_local_irq_save(flags) \
+	do { raw_local_save_flags(flags); raw_local_irq_disable(); } while (0)
+
+/* Prevent the given interrupt from being enabled next time we enable irqs. */
+#define raw_local_irq_mask(interrupt) \
+	(__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
+
+/* Prevent the given interrupt from being enabled immediately. */
+#define raw_local_irq_mask_now(interrupt) do { \
+	raw_local_irq_mask(interrupt); \
+	interrupt_mask_set(interrupt); \
+} while (0)
+
+/* Allow the given interrupt to be enabled next time we enable irqs. */
+#define raw_local_irq_unmask(interrupt) \
+	(__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
+
+/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
+#define raw_local_irq_unmask_now(interrupt) do { \
+	raw_local_irq_unmask(interrupt); \
+	if (!irqs_disabled()) \
+		interrupt_mask_reset(interrupt); \
+} while (0)
+
+#else /* __ASSEMBLY__ */
+
+/* We provide a somewhat more restricted set for assembly. */
+
+#ifdef __tilegx__
+
+#if INT_MEM_ERROR != 0
+# error Fix IRQ_DISABLED() macro
+#endif
+
+/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
+#define IRQS_DISABLED(tmp)					\
+	mfspr   tmp, INTERRUPT_MASK_1;				\
+	andi    tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
+	moveli reg, hw2_last(interrupts_enabled_mask); \
+	shl16insli reg, reg, hw1(interrupts_enabled_mask); \
+	shl16insli reg, reg, hw0(interrupts_enabled_mask); \
+	add     reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1)					\
+	moveli  tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS);	\
+	shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS);	\
+	shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS);	\
+	mtspr   INTERRUPT_MASK_SET_1, tmp0
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp)					\
+	movei   tmp, -1;					\
+	mtspr   INTERRUPT_MASK_SET_1, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1)					\
+	GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);			\
+	ld      tmp0, tmp0;					\
+	mtspr   INTERRUPT_MASK_RESET_1, tmp0
+
+#else /* !__tilegx__ */
+
+/*
+ * Return 0 or 1 to indicate whether interrupts are currently disabled.
+ * Note that it's important that we use a bit from the "low" mask word,
+ * since when we are enabling, that is the word we write first, so if we
+ * are interrupted after only writing half of the mask, the interrupt
+ * handler will correctly observe that we have interrupts enabled, and
+ * will enable interrupts itself on return from the interrupt handler
+ * (making the original code's write of the "high" mask word idempotent).
+ */
+#define IRQS_DISABLED(tmp)					\
+	mfspr   tmp, INTERRUPT_MASK_1_0;			\
+	shri    tmp, tmp, INT_MEM_ERROR;			\
+	andi    tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
+	moveli  reg, lo16(interrupts_enabled_mask);	\
+	auli    reg, reg, ha16(interrupts_enabled_mask);\
+	add     reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1)					\
+	{							\
+	 movei  tmp0, -1;					\
+	 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS)		\
+	};							\
+	{							\
+	 mtspr  INTERRUPT_MASK_SET_1_0, tmp0;			\
+	 auli   tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS)	\
+	};							\
+	mtspr   INTERRUPT_MASK_SET_1_1, tmp1
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp)					\
+	movei   tmp, -1;					\
+	mtspr   INTERRUPT_MASK_SET_1_0, tmp;			\
+	mtspr   INTERRUPT_MASK_SET_1_1, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1)					\
+	GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);			\
+	{							\
+	 lw     tmp0, tmp0;					\
+	 addi   tmp1, tmp0, 4					\
+	};							\
+	lw      tmp1, tmp1;					\
+	mtspr   INTERRUPT_MASK_RESET_1_0, tmp0;			\
+	mtspr   INTERRUPT_MASK_RESET_1_1, tmp1
+#endif
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, but almost everywhere we do, we don't mind clobbering
+ * all the caller-saved registers.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON  jal trace_hardirqs_on
+# define TRACE_IRQS_OFF jal trace_hardirqs_off
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_IRQFLAGS_H */
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 0000000..6ece1b0
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1 @@
+#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
new file mode 100644
index 0000000..c11a6cc
--- /dev/null
+++ b/arch/tile/include/asm/kexec.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * based on kexec.h from other architectures in linux-2.6.18
+ */
+
+#ifndef _ASM_TILE_KEXEC_H
+#define _ASM_TILE_KEXEC_H
+
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_PAGE_SIZE	PAGE_SIZE
+
+/*
+ * We don't bother to provide a unique identifier, since we can only
+ * reboot with a single type of kernel image anyway.
+ */
+#define KEXEC_ARCH KEXEC_ARCH_DEFAULT
+
+/* Use the tile override for the page allocator. */
+struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order);
+#define kimage_alloc_pages_arch kimage_alloc_pages_arch
+
+#define MAX_NOTE_BYTES 1024
+
+/* Defined in arch/tile/kernel/relocate_kernel.S */
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern void relocate_new_kernel_end(void);
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *n, struct pt_regs *o)
+{
+}
+
+#endif /* _ASM_TILE_KEXEC_H */
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
new file mode 100644
index 0000000..1480106
--- /dev/null
+++ b/arch/tile/include/asm/kmap_types.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KMAP_TYPES_H
+#define _ASM_TILE_KMAP_TYPES_H
+
+/*
+ * In TILE Linux each set of four of these uses another 16MB chunk of
+ * address space, given 64 tiles and 64KB pages, so we only enable
+ * ones that are required by the kernel configuration.
+ */
+enum km_type {
+	KM_BOUNCE_READ,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	KM_MEMCPY0,
+	KM_MEMCPY1,
+#if defined(CONFIG_HIGHPTE)
+	KM_PTE0,
+	KM_PTE1,
+#endif
+	KM_TYPE_NR
+};
+
+#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/linkage.h b/arch/tile/include/asm/linkage.h
new file mode 100644
index 0000000..e121c39
--- /dev/null
+++ b/arch/tile/include/asm/linkage.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_LINKAGE_H
+#define _ASM_TILE_LINKAGE_H
+
+#include <feedback.h>
+
+#define __ALIGN .align 8
+
+/*
+ * The STD_ENTRY and STD_ENDPROC macros put the function in a
+ * self-named .text.foo section, and if linker feedback collection
+ * is enabled, add a suitable call to the feedback collection code.
+ * STD_ENTRY_SECTION lets you specify a non-standard section name.
+ */
+
+#define STD_ENTRY(name) \
+  .pushsection .text.##name, "ax"; \
+  ENTRY(name); \
+  FEEDBACK_ENTER(name)
+
+#define STD_ENTRY_SECTION(name, section) \
+  .pushsection section, "ax"; \
+  ENTRY(name); \
+  FEEDBACK_ENTER_EXPLICIT(name, section, .Lend_##name - name)
+
+#define STD_ENDPROC(name) \
+  ENDPROC(name); \
+  .Lend_##name:; \
+  .popsection
+
+/* Create a file-static function entry set up for feedback gathering. */
+#define STD_ENTRY_LOCAL(name) \
+  .pushsection .text.##name, "ax"; \
+  ALIGN; \
+  name:; \
+  FEEDBACK_ENTER(name)
+
+#endif /* _ASM_TILE_LINKAGE_H */
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
new file mode 100644
index 0000000..c11c530
--- /dev/null
+++ b/arch/tile/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
new file mode 100644
index 0000000..359949b
--- /dev/null
+++ b/arch/tile/include/asm/memprof.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * The hypervisor's memory controller profiling infrastructure allows
+ * the programmer to find out what fraction of the available memory
+ * bandwidth is being consumed at each memory controller.  The
+ * profiler provides start, stop, and clear operations to allows
+ * profiling over a specific time window, as well as an interface for
+ * reading the most recent profile values.
+ *
+ * This header declares IOCTL codes necessary to control memprof.
+ */
+#ifndef _ASM_TILE_MEMPROF_H
+#define _ASM_TILE_MEMPROF_H
+
+#include <linux/ioctl.h>
+
+#define MEMPROF_IOCTL_TYPE 0xB4
+#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
+#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
+#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
+
+#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h
new file mode 100644
index 0000000..4c6811e
--- /dev/null
+++ b/arch/tile/include/asm/mman.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMAN_H
+#define _ASM_TILE_MMAN_H
+
+#include <asm-generic/mman-common.h>
+#include <arch/chip.h>
+
+/* Standard Linux flags */
+
+#define MAP_POPULATE	0x0040		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x0080		/* do not block on IO */
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_LOCKED	0x0200		/* pages are locked */
+#define MAP_NORESERVE	0x0400		/* don't check for reservations */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_HUGETLB	0x4000		/* create a huge page mapping */
+
+
+/*
+ * Flags for mlockall
+ */
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+
+#endif /* _ASM_TILE_MMAN_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
new file mode 100644
index 0000000..92f94c7
--- /dev/null
+++ b/arch/tile/include/asm/mmu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMU_H
+#define _ASM_TILE_MMU_H
+
+/* Capture any arch- and mm-specific information. */
+struct mm_context {
+	/*
+	 * Written under the mmap_sem semaphore; read without the
+	 * semaphore but atomically, but it is conservatively set.
+	 */
+	unsigned int priority_cached;
+};
+
+typedef struct mm_context mm_context_t;
+
+void leave_mm(int cpu);
+
+#endif /* _ASM_TILE_MMU_H */
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
new file mode 100644
index 0000000..9bc0d07
--- /dev/null
+++ b/arch/tile/include/asm/mmu_context.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMU_CONTEXT_H
+#define _ASM_TILE_MMU_CONTEXT_H
+
+#include <linux/smp.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+#include <asm-generic/mm_hooks.h>
+
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	return 0;
+}
+
+/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
+{
+	/* FIXME: DIRECTIO should not always be set. FIXME. */
+	int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+	if (rc < 0)
+		panic("hv_install_context failed: %d", rc);
+}
+
+static inline void install_page_table(pgd_t *pgdir, int asid)
+{
+	pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+	__install_page_table(pgdir, asid, *ptep);
+}
+
+/*
+ * "Lazy" TLB mode is entered when we are switching to a kernel task,
+ * which borrows the mm of the previous task.  The goal of this
+ * optimization is to avoid having to install a new page table.  On
+ * early x86 machines (where the concept originated) you couldn't do
+ * anything short of a full page table install for invalidation, so
+ * handling a remote TLB invalidate required doing a page table
+ * re-install.  Someone clearly decided that it was silly to keep
+ * doing this while in "lazy" TLB mode, so the optimization involves
+ * installing the swapper page table instead the first time one
+ * occurs, and clearing the cpu out of cpu_vm_mask, so the cpu running
+ * the kernel task doesn't need to take any more interrupts.  At that
+ * point it's then necessary to explicitly reinstall it when context
+ * switching back to the original mm.
+ *
+ * On Tile, we have to do a page-table install whenever DMA is enabled,
+ * so in that case lazy mode doesn't help anyway.  And more generally,
+ * we have efficient per-page TLB shootdown, and don't expect to spend
+ * that much time in kernel tasks in general, so just leaving the
+ * kernel task borrowing the old page table, but handling TLB
+ * shootdowns, is a reasonable thing to do.  And importantly, this
+ * lets us use the hypervisor's internal APIs for TLB shootdown, which
+ * means we don't have to worry about having TLB shootdowns blocked
+ * when Linux is disabling interrupts; see the page migration code for
+ * an example of where it's important for TLB shootdowns to complete
+ * even when interrupts are disabled at the Linux level.
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t)
+{
+#if CHIP_HAS_TILE_DMA()
+	/*
+	 * We have to do an "identity" page table switch in order to
+	 * clear any pending DMA interrupts.
+	 */
+	if (current->thread.tile_dma_state.enabled)
+		install_page_table(mm->pgd, __get_cpu_var(current_asid));
+#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	if (likely(prev != next)) {
+
+		int cpu = smp_processor_id();
+
+		/* Pick new ASID. */
+		int asid = __get_cpu_var(current_asid) + 1;
+		if (asid > max_asid) {
+			asid = min_asid;
+			local_flush_tlb();
+		}
+		__get_cpu_var(current_asid) = asid;
+
+		/* Clear cpu from the old mm, and set it in the new one. */
+		cpumask_clear_cpu(cpu, &prev->cpu_vm_mask);
+		cpumask_set_cpu(cpu, &next->cpu_vm_mask);
+
+		/* Re-load page tables */
+		install_page_table(next->pgd, asid);
+
+		/* See how we should set the red/black cache info */
+		check_mm_caching(prev, next);
+
+		/*
+		 * Since we're changing to a new mm, we have to flush
+		 * the icache in case some physical page now being mapped
+		 * has subsequently been repurposed and has new code.
+		 */
+		__flush_icache();
+
+	}
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	switch_mm(prev_mm, next_mm, NULL);
+}
+
+#define destroy_context(mm)		do { } while (0)
+#define deactivate_mm(tsk, mm)          do { } while (0)
+
+#endif /* _ASM_TILE_MMU_CONTEXT_H */
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
new file mode 100644
index 0000000..c6344c4
--- /dev/null
+++ b/arch/tile/include/asm/mmzone.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMZONE_H
+#define _ASM_TILE_MMZONE_H
+
+extern struct pglist_data node_data[];
+#define NODE_DATA(nid)	(&node_data[nid])
+
+extern void get_memcfg_numa(void);
+
+#ifdef CONFIG_DISCONTIGMEM
+
+#include <asm/page.h>
+
+/*
+ * Generally, memory ranges are always doled out by the hypervisor in
+ * fixed-size, power-of-two increments.  That would make computing the node
+ * very easy.  We could just take a couple high bits of the PA, which
+ * denote the memory shim, and we'd be done.  However, when we're doing
+ * memory striping, this may not be true; PAs with different high bit
+ * values might be in the same node.  Thus, we keep a lookup table to
+ * translate the high bits of the PFN to the node number.
+ */
+extern int highbits_to_node[];
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	return highbits_to_node[__pfn_to_highbits(pfn)];
+}
+
+/*
+ * Following are macros that each numa implmentation must define.
+ */
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
+})
+
+#define kern_addr_valid(kaddr)	virt_addr_valid((void *)kaddr)
+
+static inline int pfn_valid(int pfn)
+{
+	int nid = pfn_to_nid(pfn);
+
+	if (nid >= 0)
+		return (pfn < node_end_pfn(nid));
+	return 0;
+}
+
+/* Information on the NUMA nodes that we compute early */
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_memmap_pfn[];
+extern unsigned long node_percpu_pfn[];
+extern unsigned long node_free_pfn[];
+#ifdef CONFIG_HIGHMEM
+extern unsigned long node_lowmem_end_pfn[];
+#endif
+#ifdef CONFIG_PCI
+extern unsigned long pci_reserve_start_pfn;
+extern unsigned long pci_reserve_end_pfn;
+#endif
+
+#endif /* CONFIG_DISCONTIGMEM */
+
+#endif /* _ASM_TILE_MMZONE_H */
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
new file mode 100644
index 0000000..1e4b79f
--- /dev/null
+++ b/arch/tile/include/asm/module.h
@@ -0,0 +1 @@
+#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
new file mode 100644
index 0000000..809134c
--- /dev/null
+++ b/arch/tile/include/asm/msgbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
new file mode 100644
index 0000000..ff6101a
--- /dev/null
+++ b/arch/tile/include/asm/mutex.h
@@ -0,0 +1 @@
+#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/opcode-tile.h b/arch/tile/include/asm/opcode-tile.h
new file mode 100644
index 0000000..ba38959
--- /dev/null
+++ b/arch/tile/include/asm/opcode-tile.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_OPCODE_TILE_H
+#define _ASM_TILE_OPCODE_TILE_H
+
+#include <arch/chip.h>
+
+#if CHIP_WORD_SIZE() == 64
+#include <asm/opcode-tile_64.h>
+#else
+#include <asm/opcode-tile_32.h>
+#endif
+
+/* These definitions are not correct for TILE64, so just avoid them. */
+#undef TILE_ELF_MACHINE_CODE
+#undef TILE_ELF_NAME
+
+#endif /* _ASM_TILE_OPCODE_TILE_H */
diff --git a/arch/tile/include/asm/opcode-tile_32.h b/arch/tile/include/asm/opcode-tile_32.h
new file mode 100644
index 0000000..90f8dd3
--- /dev/null
+++ b/arch/tile/include/asm/opcode-tile_32.h
@@ -0,0 +1,1597 @@
+/* tile.h -- Header file for TILE opcode table
+   Copyright (C) 2005 Free Software Foundation, Inc.
+   Contributed by Tilera Corp. */
+
+#ifndef opcode_tile_h
+#define opcode_tile_h
+
+typedef unsigned long long tile_bundle_bits;
+
+
+enum
+{
+  TILE_MAX_OPERANDS = 5 /* mm */
+};
+
+typedef enum
+{
+  TILE_OPC_BPT,
+  TILE_OPC_INFO,
+  TILE_OPC_INFOL,
+  TILE_OPC_J,
+  TILE_OPC_JAL,
+  TILE_OPC_MOVE,
+  TILE_OPC_MOVE_SN,
+  TILE_OPC_MOVEI,
+  TILE_OPC_MOVEI_SN,
+  TILE_OPC_MOVELI,
+  TILE_OPC_MOVELI_SN,
+  TILE_OPC_MOVELIS,
+  TILE_OPC_PREFETCH,
+  TILE_OPC_ADD,
+  TILE_OPC_ADD_SN,
+  TILE_OPC_ADDB,
+  TILE_OPC_ADDB_SN,
+  TILE_OPC_ADDBS_U,
+  TILE_OPC_ADDBS_U_SN,
+  TILE_OPC_ADDH,
+  TILE_OPC_ADDH_SN,
+  TILE_OPC_ADDHS,
+  TILE_OPC_ADDHS_SN,
+  TILE_OPC_ADDI,
+  TILE_OPC_ADDI_SN,
+  TILE_OPC_ADDIB,
+  TILE_OPC_ADDIB_SN,
+  TILE_OPC_ADDIH,
+  TILE_OPC_ADDIH_SN,
+  TILE_OPC_ADDLI,
+  TILE_OPC_ADDLI_SN,
+  TILE_OPC_ADDLIS,
+  TILE_OPC_ADDS,
+  TILE_OPC_ADDS_SN,
+  TILE_OPC_ADIFFB_U,
+  TILE_OPC_ADIFFB_U_SN,
+  TILE_OPC_ADIFFH,
+  TILE_OPC_ADIFFH_SN,
+  TILE_OPC_AND,
+  TILE_OPC_AND_SN,
+  TILE_OPC_ANDI,
+  TILE_OPC_ANDI_SN,
+  TILE_OPC_AULI,
+  TILE_OPC_AVGB_U,
+  TILE_OPC_AVGB_U_SN,
+  TILE_OPC_AVGH,
+  TILE_OPC_AVGH_SN,
+  TILE_OPC_BBNS,
+  TILE_OPC_BBNS_SN,
+  TILE_OPC_BBNST,
+  TILE_OPC_BBNST_SN,
+  TILE_OPC_BBS,
+  TILE_OPC_BBS_SN,
+  TILE_OPC_BBST,
+  TILE_OPC_BBST_SN,
+  TILE_OPC_BGEZ,
+  TILE_OPC_BGEZ_SN,
+  TILE_OPC_BGEZT,
+  TILE_OPC_BGEZT_SN,
+  TILE_OPC_BGZ,
+  TILE_OPC_BGZ_SN,
+  TILE_OPC_BGZT,
+  TILE_OPC_BGZT_SN,
+  TILE_OPC_BITX,
+  TILE_OPC_BITX_SN,
+  TILE_OPC_BLEZ,
+  TILE_OPC_BLEZ_SN,
+  TILE_OPC_BLEZT,
+  TILE_OPC_BLEZT_SN,
+  TILE_OPC_BLZ,
+  TILE_OPC_BLZ_SN,
+  TILE_OPC_BLZT,
+  TILE_OPC_BLZT_SN,
+  TILE_OPC_BNZ,
+  TILE_OPC_BNZ_SN,
+  TILE_OPC_BNZT,
+  TILE_OPC_BNZT_SN,
+  TILE_OPC_BYTEX,
+  TILE_OPC_BYTEX_SN,
+  TILE_OPC_BZ,
+  TILE_OPC_BZ_SN,
+  TILE_OPC_BZT,
+  TILE_OPC_BZT_SN,
+  TILE_OPC_CLZ,
+  TILE_OPC_CLZ_SN,
+  TILE_OPC_CRC32_32,
+  TILE_OPC_CRC32_32_SN,
+  TILE_OPC_CRC32_8,
+  TILE_OPC_CRC32_8_SN,
+  TILE_OPC_CTZ,
+  TILE_OPC_CTZ_SN,
+  TILE_OPC_DRAIN,
+  TILE_OPC_DTLBPR,
+  TILE_OPC_DWORD_ALIGN,
+  TILE_OPC_DWORD_ALIGN_SN,
+  TILE_OPC_FINV,
+  TILE_OPC_FLUSH,
+  TILE_OPC_FNOP,
+  TILE_OPC_ICOH,
+  TILE_OPC_ILL,
+  TILE_OPC_INTHB,
+  TILE_OPC_INTHB_SN,
+  TILE_OPC_INTHH,
+  TILE_OPC_INTHH_SN,
+  TILE_OPC_INTLB,
+  TILE_OPC_INTLB_SN,
+  TILE_OPC_INTLH,
+  TILE_OPC_INTLH_SN,
+  TILE_OPC_INV,
+  TILE_OPC_IRET,
+  TILE_OPC_JALB,
+  TILE_OPC_JALF,
+  TILE_OPC_JALR,
+  TILE_OPC_JALRP,
+  TILE_OPC_JB,
+  TILE_OPC_JF,
+  TILE_OPC_JR,
+  TILE_OPC_JRP,
+  TILE_OPC_LB,
+  TILE_OPC_LB_SN,
+  TILE_OPC_LB_U,
+  TILE_OPC_LB_U_SN,
+  TILE_OPC_LBADD,
+  TILE_OPC_LBADD_SN,
+  TILE_OPC_LBADD_U,
+  TILE_OPC_LBADD_U_SN,
+  TILE_OPC_LH,
+  TILE_OPC_LH_SN,
+  TILE_OPC_LH_U,
+  TILE_OPC_LH_U_SN,
+  TILE_OPC_LHADD,
+  TILE_OPC_LHADD_SN,
+  TILE_OPC_LHADD_U,
+  TILE_OPC_LHADD_U_SN,
+  TILE_OPC_LNK,
+  TILE_OPC_LNK_SN,
+  TILE_OPC_LW,
+  TILE_OPC_LW_SN,
+  TILE_OPC_LW_NA,
+  TILE_OPC_LW_NA_SN,
+  TILE_OPC_LWADD,
+  TILE_OPC_LWADD_SN,
+  TILE_OPC_LWADD_NA,
+  TILE_OPC_LWADD_NA_SN,
+  TILE_OPC_MAXB_U,
+  TILE_OPC_MAXB_U_SN,
+  TILE_OPC_MAXH,
+  TILE_OPC_MAXH_SN,
+  TILE_OPC_MAXIB_U,
+  TILE_OPC_MAXIB_U_SN,
+  TILE_OPC_MAXIH,
+  TILE_OPC_MAXIH_SN,
+  TILE_OPC_MF,
+  TILE_OPC_MFSPR,
+  TILE_OPC_MINB_U,
+  TILE_OPC_MINB_U_SN,
+  TILE_OPC_MINH,
+  TILE_OPC_MINH_SN,
+  TILE_OPC_MINIB_U,
+  TILE_OPC_MINIB_U_SN,
+  TILE_OPC_MINIH,
+  TILE_OPC_MINIH_SN,
+  TILE_OPC_MM,
+  TILE_OPC_MNZ,
+  TILE_OPC_MNZ_SN,
+  TILE_OPC_MNZB,
+  TILE_OPC_MNZB_SN,
+  TILE_OPC_MNZH,
+  TILE_OPC_MNZH_SN,
+  TILE_OPC_MTSPR,
+  TILE_OPC_MULHH_SS,
+  TILE_OPC_MULHH_SS_SN,
+  TILE_OPC_MULHH_SU,
+  TILE_OPC_MULHH_SU_SN,
+  TILE_OPC_MULHH_UU,
+  TILE_OPC_MULHH_UU_SN,
+  TILE_OPC_MULHHA_SS,
+  TILE_OPC_MULHHA_SS_SN,
+  TILE_OPC_MULHHA_SU,
+  TILE_OPC_MULHHA_SU_SN,
+  TILE_OPC_MULHHA_UU,
+  TILE_OPC_MULHHA_UU_SN,
+  TILE_OPC_MULHHSA_UU,
+  TILE_OPC_MULHHSA_UU_SN,
+  TILE_OPC_MULHL_SS,
+  TILE_OPC_MULHL_SS_SN,
+  TILE_OPC_MULHL_SU,
+  TILE_OPC_MULHL_SU_SN,
+  TILE_OPC_MULHL_US,
+  TILE_OPC_MULHL_US_SN,
+  TILE_OPC_MULHL_UU,
+  TILE_OPC_MULHL_UU_SN,
+  TILE_OPC_MULHLA_SS,
+  TILE_OPC_MULHLA_SS_SN,
+  TILE_OPC_MULHLA_SU,
+  TILE_OPC_MULHLA_SU_SN,
+  TILE_OPC_MULHLA_US,
+  TILE_OPC_MULHLA_US_SN,
+  TILE_OPC_MULHLA_UU,
+  TILE_OPC_MULHLA_UU_SN,
+  TILE_OPC_MULHLSA_UU,
+  TILE_OPC_MULHLSA_UU_SN,
+  TILE_OPC_MULLL_SS,
+  TILE_OPC_MULLL_SS_SN,
+  TILE_OPC_MULLL_SU,
+  TILE_OPC_MULLL_SU_SN,
+  TILE_OPC_MULLL_UU,
+  TILE_OPC_MULLL_UU_SN,
+  TILE_OPC_MULLLA_SS,
+  TILE_OPC_MULLLA_SS_SN,
+  TILE_OPC_MULLLA_SU,
+  TILE_OPC_MULLLA_SU_SN,
+  TILE_OPC_MULLLA_UU,
+  TILE_OPC_MULLLA_UU_SN,
+  TILE_OPC_MULLLSA_UU,
+  TILE_OPC_MULLLSA_UU_SN,
+  TILE_OPC_MVNZ,
+  TILE_OPC_MVNZ_SN,
+  TILE_OPC_MVZ,
+  TILE_OPC_MVZ_SN,
+  TILE_OPC_MZ,
+  TILE_OPC_MZ_SN,
+  TILE_OPC_MZB,
+  TILE_OPC_MZB_SN,
+  TILE_OPC_MZH,
+  TILE_OPC_MZH_SN,
+  TILE_OPC_NAP,
+  TILE_OPC_NOP,
+  TILE_OPC_NOR,
+  TILE_OPC_NOR_SN,
+  TILE_OPC_OR,
+  TILE_OPC_OR_SN,
+  TILE_OPC_ORI,
+  TILE_OPC_ORI_SN,
+  TILE_OPC_PACKBS_U,
+  TILE_OPC_PACKBS_U_SN,
+  TILE_OPC_PACKHB,
+  TILE_OPC_PACKHB_SN,
+  TILE_OPC_PACKHS,
+  TILE_OPC_PACKHS_SN,
+  TILE_OPC_PACKLB,
+  TILE_OPC_PACKLB_SN,
+  TILE_OPC_PCNT,
+  TILE_OPC_PCNT_SN,
+  TILE_OPC_RL,
+  TILE_OPC_RL_SN,
+  TILE_OPC_RLI,
+  TILE_OPC_RLI_SN,
+  TILE_OPC_S1A,
+  TILE_OPC_S1A_SN,
+  TILE_OPC_S2A,
+  TILE_OPC_S2A_SN,
+  TILE_OPC_S3A,
+  TILE_OPC_S3A_SN,
+  TILE_OPC_SADAB_U,
+  TILE_OPC_SADAB_U_SN,
+  TILE_OPC_SADAH,
+  TILE_OPC_SADAH_SN,
+  TILE_OPC_SADAH_U,
+  TILE_OPC_SADAH_U_SN,
+  TILE_OPC_SADB_U,
+  TILE_OPC_SADB_U_SN,
+  TILE_OPC_SADH,
+  TILE_OPC_SADH_SN,
+  TILE_OPC_SADH_U,
+  TILE_OPC_SADH_U_SN,
+  TILE_OPC_SB,
+  TILE_OPC_SBADD,
+  TILE_OPC_SEQ,
+  TILE_OPC_SEQ_SN,
+  TILE_OPC_SEQB,
+  TILE_OPC_SEQB_SN,
+  TILE_OPC_SEQH,
+  TILE_OPC_SEQH_SN,
+  TILE_OPC_SEQI,
+  TILE_OPC_SEQI_SN,
+  TILE_OPC_SEQIB,
+  TILE_OPC_SEQIB_SN,
+  TILE_OPC_SEQIH,
+  TILE_OPC_SEQIH_SN,
+  TILE_OPC_SH,
+  TILE_OPC_SHADD,
+  TILE_OPC_SHL,
+  TILE_OPC_SHL_SN,
+  TILE_OPC_SHLB,
+  TILE_OPC_SHLB_SN,
+  TILE_OPC_SHLH,
+  TILE_OPC_SHLH_SN,
+  TILE_OPC_SHLI,
+  TILE_OPC_SHLI_SN,
+  TILE_OPC_SHLIB,
+  TILE_OPC_SHLIB_SN,
+  TILE_OPC_SHLIH,
+  TILE_OPC_SHLIH_SN,
+  TILE_OPC_SHR,
+  TILE_OPC_SHR_SN,
+  TILE_OPC_SHRB,
+  TILE_OPC_SHRB_SN,
+  TILE_OPC_SHRH,
+  TILE_OPC_SHRH_SN,
+  TILE_OPC_SHRI,
+  TILE_OPC_SHRI_SN,
+  TILE_OPC_SHRIB,
+  TILE_OPC_SHRIB_SN,
+  TILE_OPC_SHRIH,
+  TILE_OPC_SHRIH_SN,
+  TILE_OPC_SLT,
+  TILE_OPC_SLT_SN,
+  TILE_OPC_SLT_U,
+  TILE_OPC_SLT_U_SN,
+  TILE_OPC_SLTB,
+  TILE_OPC_SLTB_SN,
+  TILE_OPC_SLTB_U,
+  TILE_OPC_SLTB_U_SN,
+  TILE_OPC_SLTE,
+  TILE_OPC_SLTE_SN,
+  TILE_OPC_SLTE_U,
+  TILE_OPC_SLTE_U_SN,
+  TILE_OPC_SLTEB,
+  TILE_OPC_SLTEB_SN,
+  TILE_OPC_SLTEB_U,
+  TILE_OPC_SLTEB_U_SN,
+  TILE_OPC_SLTEH,
+  TILE_OPC_SLTEH_SN,
+  TILE_OPC_SLTEH_U,
+  TILE_OPC_SLTEH_U_SN,
+  TILE_OPC_SLTH,
+  TILE_OPC_SLTH_SN,
+  TILE_OPC_SLTH_U,
+  TILE_OPC_SLTH_U_SN,
+  TILE_OPC_SLTI,
+  TILE_OPC_SLTI_SN,
+  TILE_OPC_SLTI_U,
+  TILE_OPC_SLTI_U_SN,
+  TILE_OPC_SLTIB,
+  TILE_OPC_SLTIB_SN,
+  TILE_OPC_SLTIB_U,
+  TILE_OPC_SLTIB_U_SN,
+  TILE_OPC_SLTIH,
+  TILE_OPC_SLTIH_SN,
+  TILE_OPC_SLTIH_U,
+  TILE_OPC_SLTIH_U_SN,
+  TILE_OPC_SNE,
+  TILE_OPC_SNE_SN,
+  TILE_OPC_SNEB,
+  TILE_OPC_SNEB_SN,
+  TILE_OPC_SNEH,
+  TILE_OPC_SNEH_SN,
+  TILE_OPC_SRA,
+  TILE_OPC_SRA_SN,
+  TILE_OPC_SRAB,
+  TILE_OPC_SRAB_SN,
+  TILE_OPC_SRAH,
+  TILE_OPC_SRAH_SN,
+  TILE_OPC_SRAI,
+  TILE_OPC_SRAI_SN,
+  TILE_OPC_SRAIB,
+  TILE_OPC_SRAIB_SN,
+  TILE_OPC_SRAIH,
+  TILE_OPC_SRAIH_SN,
+  TILE_OPC_SUB,
+  TILE_OPC_SUB_SN,
+  TILE_OPC_SUBB,
+  TILE_OPC_SUBB_SN,
+  TILE_OPC_SUBBS_U,
+  TILE_OPC_SUBBS_U_SN,
+  TILE_OPC_SUBH,
+  TILE_OPC_SUBH_SN,
+  TILE_OPC_SUBHS,
+  TILE_OPC_SUBHS_SN,
+  TILE_OPC_SUBS,
+  TILE_OPC_SUBS_SN,
+  TILE_OPC_SW,
+  TILE_OPC_SWADD,
+  TILE_OPC_SWINT0,
+  TILE_OPC_SWINT1,
+  TILE_OPC_SWINT2,
+  TILE_OPC_SWINT3,
+  TILE_OPC_TBLIDXB0,
+  TILE_OPC_TBLIDXB0_SN,
+  TILE_OPC_TBLIDXB1,
+  TILE_OPC_TBLIDXB1_SN,
+  TILE_OPC_TBLIDXB2,
+  TILE_OPC_TBLIDXB2_SN,
+  TILE_OPC_TBLIDXB3,
+  TILE_OPC_TBLIDXB3_SN,
+  TILE_OPC_TNS,
+  TILE_OPC_TNS_SN,
+  TILE_OPC_WH64,
+  TILE_OPC_XOR,
+  TILE_OPC_XOR_SN,
+  TILE_OPC_XORI,
+  TILE_OPC_XORI_SN,
+  TILE_OPC_NONE
+} tile_mnemonic;
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILE_BPT_BUNDLE 0x400b3cae70166000ULL
+
+
+#define TILE_ELF_MACHINE_CODE EM_TILEPRO
+
+#define TILE_ELF_NAME "elf32-tilepro"
+
+enum
+{
+  TILE_SN_MAX_OPERANDS = 6 /* route */
+};
+
+typedef enum
+{
+  TILE_SN_OPC_BZ,
+  TILE_SN_OPC_BNZ,
+  TILE_SN_OPC_JRR,
+  TILE_SN_OPC_FNOP,
+  TILE_SN_OPC_BLZ,
+  TILE_SN_OPC_NOP,
+  TILE_SN_OPC_MOVEI,
+  TILE_SN_OPC_MOVE,
+  TILE_SN_OPC_BGEZ,
+  TILE_SN_OPC_JR,
+  TILE_SN_OPC_BLEZ,
+  TILE_SN_OPC_BBNS,
+  TILE_SN_OPC_JALRR,
+  TILE_SN_OPC_BPT,
+  TILE_SN_OPC_JALR,
+  TILE_SN_OPC_SHR1,
+  TILE_SN_OPC_BGZ,
+  TILE_SN_OPC_BBS,
+  TILE_SN_OPC_SHL8II,
+  TILE_SN_OPC_ADDI,
+  TILE_SN_OPC_HALT,
+  TILE_SN_OPC_ROUTE,
+  TILE_SN_OPC_NONE
+} tile_sn_mnemonic;
+
+extern const unsigned char tile_sn_route_encode[6 * 6 * 6];
+extern const signed char tile_sn_route_decode[256][3];
+extern const char tile_sn_direction_names[6][5];
+extern const signed char tile_sn_dest_map[6][6];
+
+
+static __inline unsigned int
+get_BrOff_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000);
+}
+
+static __inline unsigned int
+get_BrType_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0xf);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 2)) & 0x3);
+}
+
+static __inline unsigned int
+get_Dest_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmRROpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 8)) & 0x3);
+}
+
+static __inline unsigned int
+get_JOffLong_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x18000000);
+}
+
+static __inline unsigned int
+get_JOff_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x08000000);
+}
+
+static __inline unsigned int
+get_MF_Imm15_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x00003fff) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_MMEnd_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMEnd_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 23)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MT_Imm15_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_Mode(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 63)) & 0x1);
+}
+
+static __inline unsigned int
+get_NoRegOpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 10)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 56)) & 0x7);
+}
+
+static __inline unsigned int
+get_RROpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 4)) & 0xf);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_RouteOpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_S_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0x1);
+}
+
+static __inline unsigned int
+get_S_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tile_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 50)) & 0x0000003e);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Src_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x7);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x7);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+
+
+static __inline tile_bundle_bits
+create_BrOff_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20);
+}
+
+static __inline tile_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tile_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Dest_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 2);
+}
+
+static __inline tile_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_ImmOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7f) << 20);
+}
+
+static __inline tile_bundle_bits
+create_ImmOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7f)) << 51);
+}
+
+static __inline tile_bundle_bits
+create_ImmRROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 8);
+}
+
+static __inline tile_bundle_bits
+create_JOffLong_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tile_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tile_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tile_bundle_bits)(n & 0x18000000)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_JOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tile_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tile_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tile_bundle_bits)(n & 0x08000000)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_MF_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00003fff)) << 37) |
+         (((tile_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tile_bundle_bits
+create_MMEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 18);
+}
+
+static __inline tile_bundle_bits
+create_MMEnd_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_MMStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 23);
+}
+
+static __inline tile_bundle_bits
+create_MMStart_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tile_bundle_bits
+create_MT_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tile_bundle_bits)(n & 0x00003fc0)) << 37) |
+         (((tile_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tile_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1)) << 63);
+}
+
+static __inline tile_bundle_bits
+create_NoRegOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 10);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7)) << 56);
+}
+
+static __inline tile_bundle_bits
+create_RROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 4);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1ff) << 18);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1ff)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_RouteOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_S_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1) << 27);
+}
+
+static __inline tile_bundle_bits
+create_S_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tile_bundle_bits)(n & 0x0000003e)) << 50);
+}
+
+static __inline tile_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Src_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 0);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 17);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3ff)) << 48);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 17);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7)) << 48);
+}
+
+
+typedef unsigned short tile_sn_instruction_bits;
+
+
+typedef enum
+{
+  TILE_PIPELINE_X0,
+  TILE_PIPELINE_X1,
+  TILE_PIPELINE_Y0,
+  TILE_PIPELINE_Y1,
+  TILE_PIPELINE_Y2,
+} tile_pipeline;
+
+#define tile_is_x_pipeline(p) ((int)(p) <= (int)TILE_PIPELINE_X1)
+
+typedef enum
+{
+  TILE_OP_TYPE_REGISTER,
+  TILE_OP_TYPE_IMMEDIATE,
+  TILE_OP_TYPE_ADDRESS,
+  TILE_OP_TYPE_SPR
+} tile_operand_type;
+
+/* This is the bit that determines if a bundle is in the Y encoding. */
+#define TILE_BUNDLE_Y_ENCODING_MASK ((tile_bundle_bits)1 << 63)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILE_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILE_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILE_BUNDLE_SIZE_IN_BYTES. */
+  TILE_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILE_BUNDLE_SIZE_IN_BYTES = 1 << TILE_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILE_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILE_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Log base 2 of TILE_SN_INSTRUCTION_SIZE_IN_BYTES. */
+  TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1,
+
+  /* Static network instructions take this many bytes. */
+  TILE_SN_INSTRUCTION_SIZE_IN_BYTES =
+    (1 << TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILE_NUM_REGISTERS = 64,
+
+  /* Number of static network registers. */
+  TILE_NUM_SN_REGISTERS = 4
+};
+
+
+struct tile_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tile_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tile_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tile_bundle_bits bundle);
+};
+
+
+extern const struct tile_operand tile_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS];
+
+
+struct tile_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tile_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tile_operands[] table. */
+  unsigned char operands[TILE_NUM_PIPELINE_ENCODINGS][TILE_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values for each pipeline.
+   * This is useful for disassembly. */
+  tile_bundle_bits fixed_bit_masks[TILE_NUM_PIPELINE_ENCODINGS];
+
+  /* For each bit set in fixed_bit_masks, what the value is for this
+   * instruction. */
+  tile_bundle_bits fixed_bit_values[TILE_NUM_PIPELINE_ENCODINGS];
+};
+
+extern const struct tile_opcode tile_opcodes[];
+
+struct tile_sn_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tile_sn_mnemonic mnemonic;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* The description of the operands. Each of these is an
+   * index into the tile_operands[] table. */
+  unsigned char operands[TILE_SN_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values.
+   * This is useful for disassembly. */
+  tile_sn_instruction_bits fixed_bit_mask;
+
+  /* For each bit set in fixed_bit_masks, what its value is. */
+  tile_sn_instruction_bits fixed_bit_values;
+};
+
+extern const struct tile_sn_opcode tile_sn_opcodes[];
+
+/* Used for non-textual disassembly into structs. */
+struct tile_decoded_instruction
+{
+  const struct tile_opcode *opcode;
+  const struct tile_operand *operands[TILE_MAX_OPERANDS];
+  int operand_values[TILE_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tile(tile_bundle_bits bits,
+                           unsigned int pc,
+                           struct tile_decoded_instruction
+                           decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+/* Canonical names of all the registers. */
+/* ISSUE: This table lives in "tile-dis.c" */
+extern const char * const tile_register_names[];
+
+/* Descriptor for a special-purpose register. */
+struct tile_spr
+{
+  /* The number */
+  int number;
+
+  /* The name */
+  const char *name;
+};
+
+/* List of all the SPRs; ordered by increasing number. */
+extern const struct tile_spr tile_sprs[];
+
+/* Number of special-purpose registers. */
+extern const int tile_num_sprs;
+
+extern const char *
+get_tile_spr_name (int num);
+
+#endif /* opcode_tile_h */
diff --git a/arch/tile/include/asm/opcode-tile_64.h b/arch/tile/include/asm/opcode-tile_64.h
new file mode 100644
index 0000000..90f8dd3
--- /dev/null
+++ b/arch/tile/include/asm/opcode-tile_64.h
@@ -0,0 +1,1597 @@
+/* tile.h -- Header file for TILE opcode table
+   Copyright (C) 2005 Free Software Foundation, Inc.
+   Contributed by Tilera Corp. */
+
+#ifndef opcode_tile_h
+#define opcode_tile_h
+
+typedef unsigned long long tile_bundle_bits;
+
+
+enum
+{
+  TILE_MAX_OPERANDS = 5 /* mm */
+};
+
+typedef enum
+{
+  TILE_OPC_BPT,
+  TILE_OPC_INFO,
+  TILE_OPC_INFOL,
+  TILE_OPC_J,
+  TILE_OPC_JAL,
+  TILE_OPC_MOVE,
+  TILE_OPC_MOVE_SN,
+  TILE_OPC_MOVEI,
+  TILE_OPC_MOVEI_SN,
+  TILE_OPC_MOVELI,
+  TILE_OPC_MOVELI_SN,
+  TILE_OPC_MOVELIS,
+  TILE_OPC_PREFETCH,
+  TILE_OPC_ADD,
+  TILE_OPC_ADD_SN,
+  TILE_OPC_ADDB,
+  TILE_OPC_ADDB_SN,
+  TILE_OPC_ADDBS_U,
+  TILE_OPC_ADDBS_U_SN,
+  TILE_OPC_ADDH,
+  TILE_OPC_ADDH_SN,
+  TILE_OPC_ADDHS,
+  TILE_OPC_ADDHS_SN,
+  TILE_OPC_ADDI,
+  TILE_OPC_ADDI_SN,
+  TILE_OPC_ADDIB,
+  TILE_OPC_ADDIB_SN,
+  TILE_OPC_ADDIH,
+  TILE_OPC_ADDIH_SN,
+  TILE_OPC_ADDLI,
+  TILE_OPC_ADDLI_SN,
+  TILE_OPC_ADDLIS,
+  TILE_OPC_ADDS,
+  TILE_OPC_ADDS_SN,
+  TILE_OPC_ADIFFB_U,
+  TILE_OPC_ADIFFB_U_SN,
+  TILE_OPC_ADIFFH,
+  TILE_OPC_ADIFFH_SN,
+  TILE_OPC_AND,
+  TILE_OPC_AND_SN,
+  TILE_OPC_ANDI,
+  TILE_OPC_ANDI_SN,
+  TILE_OPC_AULI,
+  TILE_OPC_AVGB_U,
+  TILE_OPC_AVGB_U_SN,
+  TILE_OPC_AVGH,
+  TILE_OPC_AVGH_SN,
+  TILE_OPC_BBNS,
+  TILE_OPC_BBNS_SN,
+  TILE_OPC_BBNST,
+  TILE_OPC_BBNST_SN,
+  TILE_OPC_BBS,
+  TILE_OPC_BBS_SN,
+  TILE_OPC_BBST,
+  TILE_OPC_BBST_SN,
+  TILE_OPC_BGEZ,
+  TILE_OPC_BGEZ_SN,
+  TILE_OPC_BGEZT,
+  TILE_OPC_BGEZT_SN,
+  TILE_OPC_BGZ,
+  TILE_OPC_BGZ_SN,
+  TILE_OPC_BGZT,
+  TILE_OPC_BGZT_SN,
+  TILE_OPC_BITX,
+  TILE_OPC_BITX_SN,
+  TILE_OPC_BLEZ,
+  TILE_OPC_BLEZ_SN,
+  TILE_OPC_BLEZT,
+  TILE_OPC_BLEZT_SN,
+  TILE_OPC_BLZ,
+  TILE_OPC_BLZ_SN,
+  TILE_OPC_BLZT,
+  TILE_OPC_BLZT_SN,
+  TILE_OPC_BNZ,
+  TILE_OPC_BNZ_SN,
+  TILE_OPC_BNZT,
+  TILE_OPC_BNZT_SN,
+  TILE_OPC_BYTEX,
+  TILE_OPC_BYTEX_SN,
+  TILE_OPC_BZ,
+  TILE_OPC_BZ_SN,
+  TILE_OPC_BZT,
+  TILE_OPC_BZT_SN,
+  TILE_OPC_CLZ,
+  TILE_OPC_CLZ_SN,
+  TILE_OPC_CRC32_32,
+  TILE_OPC_CRC32_32_SN,
+  TILE_OPC_CRC32_8,
+  TILE_OPC_CRC32_8_SN,
+  TILE_OPC_CTZ,
+  TILE_OPC_CTZ_SN,
+  TILE_OPC_DRAIN,
+  TILE_OPC_DTLBPR,
+  TILE_OPC_DWORD_ALIGN,
+  TILE_OPC_DWORD_ALIGN_SN,
+  TILE_OPC_FINV,
+  TILE_OPC_FLUSH,
+  TILE_OPC_FNOP,
+  TILE_OPC_ICOH,
+  TILE_OPC_ILL,
+  TILE_OPC_INTHB,
+  TILE_OPC_INTHB_SN,
+  TILE_OPC_INTHH,
+  TILE_OPC_INTHH_SN,
+  TILE_OPC_INTLB,
+  TILE_OPC_INTLB_SN,
+  TILE_OPC_INTLH,
+  TILE_OPC_INTLH_SN,
+  TILE_OPC_INV,
+  TILE_OPC_IRET,
+  TILE_OPC_JALB,
+  TILE_OPC_JALF,
+  TILE_OPC_JALR,
+  TILE_OPC_JALRP,
+  TILE_OPC_JB,
+  TILE_OPC_JF,
+  TILE_OPC_JR,
+  TILE_OPC_JRP,
+  TILE_OPC_LB,
+  TILE_OPC_LB_SN,
+  TILE_OPC_LB_U,
+  TILE_OPC_LB_U_SN,
+  TILE_OPC_LBADD,
+  TILE_OPC_LBADD_SN,
+  TILE_OPC_LBADD_U,
+  TILE_OPC_LBADD_U_SN,
+  TILE_OPC_LH,
+  TILE_OPC_LH_SN,
+  TILE_OPC_LH_U,
+  TILE_OPC_LH_U_SN,
+  TILE_OPC_LHADD,
+  TILE_OPC_LHADD_SN,
+  TILE_OPC_LHADD_U,
+  TILE_OPC_LHADD_U_SN,
+  TILE_OPC_LNK,
+  TILE_OPC_LNK_SN,
+  TILE_OPC_LW,
+  TILE_OPC_LW_SN,
+  TILE_OPC_LW_NA,
+  TILE_OPC_LW_NA_SN,
+  TILE_OPC_LWADD,
+  TILE_OPC_LWADD_SN,
+  TILE_OPC_LWADD_NA,
+  TILE_OPC_LWADD_NA_SN,
+  TILE_OPC_MAXB_U,
+  TILE_OPC_MAXB_U_SN,
+  TILE_OPC_MAXH,
+  TILE_OPC_MAXH_SN,
+  TILE_OPC_MAXIB_U,
+  TILE_OPC_MAXIB_U_SN,
+  TILE_OPC_MAXIH,
+  TILE_OPC_MAXIH_SN,
+  TILE_OPC_MF,
+  TILE_OPC_MFSPR,
+  TILE_OPC_MINB_U,
+  TILE_OPC_MINB_U_SN,
+  TILE_OPC_MINH,
+  TILE_OPC_MINH_SN,
+  TILE_OPC_MINIB_U,
+  TILE_OPC_MINIB_U_SN,
+  TILE_OPC_MINIH,
+  TILE_OPC_MINIH_SN,
+  TILE_OPC_MM,
+  TILE_OPC_MNZ,
+  TILE_OPC_MNZ_SN,
+  TILE_OPC_MNZB,
+  TILE_OPC_MNZB_SN,
+  TILE_OPC_MNZH,
+  TILE_OPC_MNZH_SN,
+  TILE_OPC_MTSPR,
+  TILE_OPC_MULHH_SS,
+  TILE_OPC_MULHH_SS_SN,
+  TILE_OPC_MULHH_SU,
+  TILE_OPC_MULHH_SU_SN,
+  TILE_OPC_MULHH_UU,
+  TILE_OPC_MULHH_UU_SN,
+  TILE_OPC_MULHHA_SS,
+  TILE_OPC_MULHHA_SS_SN,
+  TILE_OPC_MULHHA_SU,
+  TILE_OPC_MULHHA_SU_SN,
+  TILE_OPC_MULHHA_UU,
+  TILE_OPC_MULHHA_UU_SN,
+  TILE_OPC_MULHHSA_UU,
+  TILE_OPC_MULHHSA_UU_SN,
+  TILE_OPC_MULHL_SS,
+  TILE_OPC_MULHL_SS_SN,
+  TILE_OPC_MULHL_SU,
+  TILE_OPC_MULHL_SU_SN,
+  TILE_OPC_MULHL_US,
+  TILE_OPC_MULHL_US_SN,
+  TILE_OPC_MULHL_UU,
+  TILE_OPC_MULHL_UU_SN,
+  TILE_OPC_MULHLA_SS,
+  TILE_OPC_MULHLA_SS_SN,
+  TILE_OPC_MULHLA_SU,
+  TILE_OPC_MULHLA_SU_SN,
+  TILE_OPC_MULHLA_US,
+  TILE_OPC_MULHLA_US_SN,
+  TILE_OPC_MULHLA_UU,
+  TILE_OPC_MULHLA_UU_SN,
+  TILE_OPC_MULHLSA_UU,
+  TILE_OPC_MULHLSA_UU_SN,
+  TILE_OPC_MULLL_SS,
+  TILE_OPC_MULLL_SS_SN,
+  TILE_OPC_MULLL_SU,
+  TILE_OPC_MULLL_SU_SN,
+  TILE_OPC_MULLL_UU,
+  TILE_OPC_MULLL_UU_SN,
+  TILE_OPC_MULLLA_SS,
+  TILE_OPC_MULLLA_SS_SN,
+  TILE_OPC_MULLLA_SU,
+  TILE_OPC_MULLLA_SU_SN,
+  TILE_OPC_MULLLA_UU,
+  TILE_OPC_MULLLA_UU_SN,
+  TILE_OPC_MULLLSA_UU,
+  TILE_OPC_MULLLSA_UU_SN,
+  TILE_OPC_MVNZ,
+  TILE_OPC_MVNZ_SN,
+  TILE_OPC_MVZ,
+  TILE_OPC_MVZ_SN,
+  TILE_OPC_MZ,
+  TILE_OPC_MZ_SN,
+  TILE_OPC_MZB,
+  TILE_OPC_MZB_SN,
+  TILE_OPC_MZH,
+  TILE_OPC_MZH_SN,
+  TILE_OPC_NAP,
+  TILE_OPC_NOP,
+  TILE_OPC_NOR,
+  TILE_OPC_NOR_SN,
+  TILE_OPC_OR,
+  TILE_OPC_OR_SN,
+  TILE_OPC_ORI,
+  TILE_OPC_ORI_SN,
+  TILE_OPC_PACKBS_U,
+  TILE_OPC_PACKBS_U_SN,
+  TILE_OPC_PACKHB,
+  TILE_OPC_PACKHB_SN,
+  TILE_OPC_PACKHS,
+  TILE_OPC_PACKHS_SN,
+  TILE_OPC_PACKLB,
+  TILE_OPC_PACKLB_SN,
+  TILE_OPC_PCNT,
+  TILE_OPC_PCNT_SN,
+  TILE_OPC_RL,
+  TILE_OPC_RL_SN,
+  TILE_OPC_RLI,
+  TILE_OPC_RLI_SN,
+  TILE_OPC_S1A,
+  TILE_OPC_S1A_SN,
+  TILE_OPC_S2A,
+  TILE_OPC_S2A_SN,
+  TILE_OPC_S3A,
+  TILE_OPC_S3A_SN,
+  TILE_OPC_SADAB_U,
+  TILE_OPC_SADAB_U_SN,
+  TILE_OPC_SADAH,
+  TILE_OPC_SADAH_SN,
+  TILE_OPC_SADAH_U,
+  TILE_OPC_SADAH_U_SN,
+  TILE_OPC_SADB_U,
+  TILE_OPC_SADB_U_SN,
+  TILE_OPC_SADH,
+  TILE_OPC_SADH_SN,
+  TILE_OPC_SADH_U,
+  TILE_OPC_SADH_U_SN,
+  TILE_OPC_SB,
+  TILE_OPC_SBADD,
+  TILE_OPC_SEQ,
+  TILE_OPC_SEQ_SN,
+  TILE_OPC_SEQB,
+  TILE_OPC_SEQB_SN,
+  TILE_OPC_SEQH,
+  TILE_OPC_SEQH_SN,
+  TILE_OPC_SEQI,
+  TILE_OPC_SEQI_SN,
+  TILE_OPC_SEQIB,
+  TILE_OPC_SEQIB_SN,
+  TILE_OPC_SEQIH,
+  TILE_OPC_SEQIH_SN,
+  TILE_OPC_SH,
+  TILE_OPC_SHADD,
+  TILE_OPC_SHL,
+  TILE_OPC_SHL_SN,
+  TILE_OPC_SHLB,
+  TILE_OPC_SHLB_SN,
+  TILE_OPC_SHLH,
+  TILE_OPC_SHLH_SN,
+  TILE_OPC_SHLI,
+  TILE_OPC_SHLI_SN,
+  TILE_OPC_SHLIB,
+  TILE_OPC_SHLIB_SN,
+  TILE_OPC_SHLIH,
+  TILE_OPC_SHLIH_SN,
+  TILE_OPC_SHR,
+  TILE_OPC_SHR_SN,
+  TILE_OPC_SHRB,
+  TILE_OPC_SHRB_SN,
+  TILE_OPC_SHRH,
+  TILE_OPC_SHRH_SN,
+  TILE_OPC_SHRI,
+  TILE_OPC_SHRI_SN,
+  TILE_OPC_SHRIB,
+  TILE_OPC_SHRIB_SN,
+  TILE_OPC_SHRIH,
+  TILE_OPC_SHRIH_SN,
+  TILE_OPC_SLT,
+  TILE_OPC_SLT_SN,
+  TILE_OPC_SLT_U,
+  TILE_OPC_SLT_U_SN,
+  TILE_OPC_SLTB,
+  TILE_OPC_SLTB_SN,
+  TILE_OPC_SLTB_U,
+  TILE_OPC_SLTB_U_SN,
+  TILE_OPC_SLTE,
+  TILE_OPC_SLTE_SN,
+  TILE_OPC_SLTE_U,
+  TILE_OPC_SLTE_U_SN,
+  TILE_OPC_SLTEB,
+  TILE_OPC_SLTEB_SN,
+  TILE_OPC_SLTEB_U,
+  TILE_OPC_SLTEB_U_SN,
+  TILE_OPC_SLTEH,
+  TILE_OPC_SLTEH_SN,
+  TILE_OPC_SLTEH_U,
+  TILE_OPC_SLTEH_U_SN,
+  TILE_OPC_SLTH,
+  TILE_OPC_SLTH_SN,
+  TILE_OPC_SLTH_U,
+  TILE_OPC_SLTH_U_SN,
+  TILE_OPC_SLTI,
+  TILE_OPC_SLTI_SN,
+  TILE_OPC_SLTI_U,
+  TILE_OPC_SLTI_U_SN,
+  TILE_OPC_SLTIB,
+  TILE_OPC_SLTIB_SN,
+  TILE_OPC_SLTIB_U,
+  TILE_OPC_SLTIB_U_SN,
+  TILE_OPC_SLTIH,
+  TILE_OPC_SLTIH_SN,
+  TILE_OPC_SLTIH_U,
+  TILE_OPC_SLTIH_U_SN,
+  TILE_OPC_SNE,
+  TILE_OPC_SNE_SN,
+  TILE_OPC_SNEB,
+  TILE_OPC_SNEB_SN,
+  TILE_OPC_SNEH,
+  TILE_OPC_SNEH_SN,
+  TILE_OPC_SRA,
+  TILE_OPC_SRA_SN,
+  TILE_OPC_SRAB,
+  TILE_OPC_SRAB_SN,
+  TILE_OPC_SRAH,
+  TILE_OPC_SRAH_SN,
+  TILE_OPC_SRAI,
+  TILE_OPC_SRAI_SN,
+  TILE_OPC_SRAIB,
+  TILE_OPC_SRAIB_SN,
+  TILE_OPC_SRAIH,
+  TILE_OPC_SRAIH_SN,
+  TILE_OPC_SUB,
+  TILE_OPC_SUB_SN,
+  TILE_OPC_SUBB,
+  TILE_OPC_SUBB_SN,
+  TILE_OPC_SUBBS_U,
+  TILE_OPC_SUBBS_U_SN,
+  TILE_OPC_SUBH,
+  TILE_OPC_SUBH_SN,
+  TILE_OPC_SUBHS,
+  TILE_OPC_SUBHS_SN,
+  TILE_OPC_SUBS,
+  TILE_OPC_SUBS_SN,
+  TILE_OPC_SW,
+  TILE_OPC_SWADD,
+  TILE_OPC_SWINT0,
+  TILE_OPC_SWINT1,
+  TILE_OPC_SWINT2,
+  TILE_OPC_SWINT3,
+  TILE_OPC_TBLIDXB0,
+  TILE_OPC_TBLIDXB0_SN,
+  TILE_OPC_TBLIDXB1,
+  TILE_OPC_TBLIDXB1_SN,
+  TILE_OPC_TBLIDXB2,
+  TILE_OPC_TBLIDXB2_SN,
+  TILE_OPC_TBLIDXB3,
+  TILE_OPC_TBLIDXB3_SN,
+  TILE_OPC_TNS,
+  TILE_OPC_TNS_SN,
+  TILE_OPC_WH64,
+  TILE_OPC_XOR,
+  TILE_OPC_XOR_SN,
+  TILE_OPC_XORI,
+  TILE_OPC_XORI_SN,
+  TILE_OPC_NONE
+} tile_mnemonic;
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILE_BPT_BUNDLE 0x400b3cae70166000ULL
+
+
+#define TILE_ELF_MACHINE_CODE EM_TILEPRO
+
+#define TILE_ELF_NAME "elf32-tilepro"
+
+enum
+{
+  TILE_SN_MAX_OPERANDS = 6 /* route */
+};
+
+typedef enum
+{
+  TILE_SN_OPC_BZ,
+  TILE_SN_OPC_BNZ,
+  TILE_SN_OPC_JRR,
+  TILE_SN_OPC_FNOP,
+  TILE_SN_OPC_BLZ,
+  TILE_SN_OPC_NOP,
+  TILE_SN_OPC_MOVEI,
+  TILE_SN_OPC_MOVE,
+  TILE_SN_OPC_BGEZ,
+  TILE_SN_OPC_JR,
+  TILE_SN_OPC_BLEZ,
+  TILE_SN_OPC_BBNS,
+  TILE_SN_OPC_JALRR,
+  TILE_SN_OPC_BPT,
+  TILE_SN_OPC_JALR,
+  TILE_SN_OPC_SHR1,
+  TILE_SN_OPC_BGZ,
+  TILE_SN_OPC_BBS,
+  TILE_SN_OPC_SHL8II,
+  TILE_SN_OPC_ADDI,
+  TILE_SN_OPC_HALT,
+  TILE_SN_OPC_ROUTE,
+  TILE_SN_OPC_NONE
+} tile_sn_mnemonic;
+
+extern const unsigned char tile_sn_route_encode[6 * 6 * 6];
+extern const signed char tile_sn_route_decode[256][3];
+extern const char tile_sn_direction_names[6][5];
+extern const signed char tile_sn_dest_map[6][6];
+
+
+static __inline unsigned int
+get_BrOff_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000);
+}
+
+static __inline unsigned int
+get_BrType_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0xf);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 2)) & 0x3);
+}
+
+static __inline unsigned int
+get_Dest_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmRROpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 8)) & 0x3);
+}
+
+static __inline unsigned int
+get_JOffLong_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x18000000);
+}
+
+static __inline unsigned int
+get_JOff_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x08000000);
+}
+
+static __inline unsigned int
+get_MF_Imm15_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x00003fff) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_MMEnd_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMEnd_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 23)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MT_Imm15_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_Mode(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 63)) & 0x1);
+}
+
+static __inline unsigned int
+get_NoRegOpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 10)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 56)) & 0x7);
+}
+
+static __inline unsigned int
+get_RROpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 4)) & 0xf);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_RouteOpcodeExtension_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_S_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0x1);
+}
+
+static __inline unsigned int
+get_S_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tile_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 50)) & 0x0000003e);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Src_SN(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y0(tile_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x7);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y1(tile_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x7);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+
+
+static __inline tile_bundle_bits
+create_BrOff_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20);
+}
+
+static __inline tile_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tile_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Dest_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 2);
+}
+
+static __inline tile_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tile_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_ImmOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7f) << 20);
+}
+
+static __inline tile_bundle_bits
+create_ImmOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7f)) << 51);
+}
+
+static __inline tile_bundle_bits
+create_ImmRROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 8);
+}
+
+static __inline tile_bundle_bits
+create_JOffLong_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tile_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tile_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tile_bundle_bits)(n & 0x18000000)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_JOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tile_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tile_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tile_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tile_bundle_bits)(n & 0x08000000)) << 31);
+}
+
+static __inline tile_bundle_bits
+create_MF_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x00003fff)) << 37) |
+         (((tile_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tile_bundle_bits
+create_MMEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 18);
+}
+
+static __inline tile_bundle_bits
+create_MMEnd_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_MMStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 23);
+}
+
+static __inline tile_bundle_bits
+create_MMStart_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tile_bundle_bits
+create_MT_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tile_bundle_bits)(n & 0x00003fc0)) << 37) |
+         (((tile_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tile_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1)) << 63);
+}
+
+static __inline tile_bundle_bits
+create_NoRegOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 0);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 10);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tile_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7)) << 56);
+}
+
+static __inline tile_bundle_bits
+create_RROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 4);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1ff) << 18);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1ff)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tile_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tile_bundle_bits
+create_RouteOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tile_bundle_bits
+create_S_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1) << 27);
+}
+
+static __inline tile_bundle_bits
+create_S_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tile_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tile_bundle_bits)(n & 0x0000003e)) << 50);
+}
+
+static __inline tile_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_Src_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 0);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tile_bundle_bits
+create_UnOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 17);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x3ff)) << 48);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 17);
+}
+
+static __inline tile_bundle_bits
+create_UnShOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tile_bundle_bits)(n & 0x7)) << 48);
+}
+
+
+typedef unsigned short tile_sn_instruction_bits;
+
+
+typedef enum
+{
+  TILE_PIPELINE_X0,
+  TILE_PIPELINE_X1,
+  TILE_PIPELINE_Y0,
+  TILE_PIPELINE_Y1,
+  TILE_PIPELINE_Y2,
+} tile_pipeline;
+
+#define tile_is_x_pipeline(p) ((int)(p) <= (int)TILE_PIPELINE_X1)
+
+typedef enum
+{
+  TILE_OP_TYPE_REGISTER,
+  TILE_OP_TYPE_IMMEDIATE,
+  TILE_OP_TYPE_ADDRESS,
+  TILE_OP_TYPE_SPR
+} tile_operand_type;
+
+/* This is the bit that determines if a bundle is in the Y encoding. */
+#define TILE_BUNDLE_Y_ENCODING_MASK ((tile_bundle_bits)1 << 63)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILE_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILE_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILE_BUNDLE_SIZE_IN_BYTES. */
+  TILE_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILE_BUNDLE_SIZE_IN_BYTES = 1 << TILE_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILE_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILE_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Log base 2 of TILE_SN_INSTRUCTION_SIZE_IN_BYTES. */
+  TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1,
+
+  /* Static network instructions take this many bytes. */
+  TILE_SN_INSTRUCTION_SIZE_IN_BYTES =
+    (1 << TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILE_NUM_REGISTERS = 64,
+
+  /* Number of static network registers. */
+  TILE_NUM_SN_REGISTERS = 4
+};
+
+
+struct tile_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tile_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tile_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tile_bundle_bits bundle);
+};
+
+
+extern const struct tile_operand tile_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS];
+
+
+struct tile_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tile_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tile_operands[] table. */
+  unsigned char operands[TILE_NUM_PIPELINE_ENCODINGS][TILE_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values for each pipeline.
+   * This is useful for disassembly. */
+  tile_bundle_bits fixed_bit_masks[TILE_NUM_PIPELINE_ENCODINGS];
+
+  /* For each bit set in fixed_bit_masks, what the value is for this
+   * instruction. */
+  tile_bundle_bits fixed_bit_values[TILE_NUM_PIPELINE_ENCODINGS];
+};
+
+extern const struct tile_opcode tile_opcodes[];
+
+struct tile_sn_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tile_sn_mnemonic mnemonic;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* The description of the operands. Each of these is an
+   * index into the tile_operands[] table. */
+  unsigned char operands[TILE_SN_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values.
+   * This is useful for disassembly. */
+  tile_sn_instruction_bits fixed_bit_mask;
+
+  /* For each bit set in fixed_bit_masks, what its value is. */
+  tile_sn_instruction_bits fixed_bit_values;
+};
+
+extern const struct tile_sn_opcode tile_sn_opcodes[];
+
+/* Used for non-textual disassembly into structs. */
+struct tile_decoded_instruction
+{
+  const struct tile_opcode *opcode;
+  const struct tile_operand *operands[TILE_MAX_OPERANDS];
+  int operand_values[TILE_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tile(tile_bundle_bits bits,
+                           unsigned int pc,
+                           struct tile_decoded_instruction
+                           decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+/* Canonical names of all the registers. */
+/* ISSUE: This table lives in "tile-dis.c" */
+extern const char * const tile_register_names[];
+
+/* Descriptor for a special-purpose register. */
+struct tile_spr
+{
+  /* The number */
+  int number;
+
+  /* The name */
+  const char *name;
+};
+
+/* List of all the SPRs; ordered by increasing number. */
+extern const struct tile_spr tile_sprs[];
+
+/* Number of special-purpose registers. */
+extern const int tile_num_sprs;
+
+extern const char *
+get_tile_spr_name (int num);
+
+#endif /* opcode_tile_h */
diff --git a/arch/tile/include/asm/opcode_constants.h b/arch/tile/include/asm/opcode_constants.h
new file mode 100644
index 0000000..37a9f29
--- /dev/null
+++ b/arch/tile/include/asm/opcode_constants.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_OPCODE_CONSTANTS_H
+#define _ASM_TILE_OPCODE_CONSTANTS_H
+
+#include <arch/chip.h>
+
+#if CHIP_WORD_SIZE() == 64
+#include <asm/opcode_constants_64.h>
+#else
+#include <asm/opcode_constants_32.h>
+#endif
+
+#endif /* _ASM_TILE_OPCODE_CONSTANTS_H */
diff --git a/arch/tile/include/asm/opcode_constants_32.h b/arch/tile/include/asm/opcode_constants_32.h
new file mode 100644
index 0000000..227d033
--- /dev/null
+++ b/arch/tile/include/asm/opcode_constants_32.h
@@ -0,0 +1,480 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+
+
+#ifndef _TILE_OPCODE_CONSTANTS_H
+#define _TILE_OPCODE_CONSTANTS_H
+enum
+{
+  ADDBS_U_SPECIAL_0_OPCODE_X0 = 98,
+  ADDBS_U_SPECIAL_0_OPCODE_X1 = 68,
+  ADDB_SPECIAL_0_OPCODE_X0 = 1,
+  ADDB_SPECIAL_0_OPCODE_X1 = 1,
+  ADDHS_SPECIAL_0_OPCODE_X0 = 99,
+  ADDHS_SPECIAL_0_OPCODE_X1 = 69,
+  ADDH_SPECIAL_0_OPCODE_X0 = 2,
+  ADDH_SPECIAL_0_OPCODE_X1 = 2,
+  ADDIB_IMM_0_OPCODE_X0 = 1,
+  ADDIB_IMM_0_OPCODE_X1 = 1,
+  ADDIH_IMM_0_OPCODE_X0 = 2,
+  ADDIH_IMM_0_OPCODE_X1 = 2,
+  ADDI_IMM_0_OPCODE_X0 = 3,
+  ADDI_IMM_0_OPCODE_X1 = 3,
+  ADDI_IMM_1_OPCODE_SN = 1,
+  ADDI_OPCODE_Y0 = 9,
+  ADDI_OPCODE_Y1 = 7,
+  ADDLIS_OPCODE_X0 = 1,
+  ADDLIS_OPCODE_X1 = 2,
+  ADDLI_OPCODE_X0 = 2,
+  ADDLI_OPCODE_X1 = 3,
+  ADDS_SPECIAL_0_OPCODE_X0 = 96,
+  ADDS_SPECIAL_0_OPCODE_X1 = 66,
+  ADD_SPECIAL_0_OPCODE_X0 = 3,
+  ADD_SPECIAL_0_OPCODE_X1 = 3,
+  ADD_SPECIAL_0_OPCODE_Y0 = 0,
+  ADD_SPECIAL_0_OPCODE_Y1 = 0,
+  ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4,
+  ADIFFH_SPECIAL_0_OPCODE_X0 = 5,
+  ANDI_IMM_0_OPCODE_X0 = 1,
+  ANDI_IMM_0_OPCODE_X1 = 4,
+  ANDI_OPCODE_Y0 = 10,
+  ANDI_OPCODE_Y1 = 8,
+  AND_SPECIAL_0_OPCODE_X0 = 6,
+  AND_SPECIAL_0_OPCODE_X1 = 4,
+  AND_SPECIAL_2_OPCODE_Y0 = 0,
+  AND_SPECIAL_2_OPCODE_Y1 = 0,
+  AULI_OPCODE_X0 = 3,
+  AULI_OPCODE_X1 = 4,
+  AVGB_U_SPECIAL_0_OPCODE_X0 = 7,
+  AVGH_SPECIAL_0_OPCODE_X0 = 8,
+  BBNST_BRANCH_OPCODE_X1 = 15,
+  BBNS_BRANCH_OPCODE_X1 = 14,
+  BBNS_OPCODE_SN = 63,
+  BBST_BRANCH_OPCODE_X1 = 13,
+  BBS_BRANCH_OPCODE_X1 = 12,
+  BBS_OPCODE_SN = 62,
+  BGEZT_BRANCH_OPCODE_X1 = 7,
+  BGEZ_BRANCH_OPCODE_X1 = 6,
+  BGEZ_OPCODE_SN = 61,
+  BGZT_BRANCH_OPCODE_X1 = 5,
+  BGZ_BRANCH_OPCODE_X1 = 4,
+  BGZ_OPCODE_SN = 58,
+  BITX_UN_0_SHUN_0_OPCODE_X0 = 1,
+  BITX_UN_0_SHUN_0_OPCODE_Y0 = 1,
+  BLEZT_BRANCH_OPCODE_X1 = 11,
+  BLEZ_BRANCH_OPCODE_X1 = 10,
+  BLEZ_OPCODE_SN = 59,
+  BLZT_BRANCH_OPCODE_X1 = 9,
+  BLZ_BRANCH_OPCODE_X1 = 8,
+  BLZ_OPCODE_SN = 60,
+  BNZT_BRANCH_OPCODE_X1 = 3,
+  BNZ_BRANCH_OPCODE_X1 = 2,
+  BNZ_OPCODE_SN = 57,
+  BPT_NOREG_RR_IMM_0_OPCODE_SN = 1,
+  BRANCH_OPCODE_X1 = 5,
+  BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2,
+  BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2,
+  BZT_BRANCH_OPCODE_X1 = 1,
+  BZ_BRANCH_OPCODE_X1 = 0,
+  BZ_OPCODE_SN = 56,
+  CLZ_UN_0_SHUN_0_OPCODE_X0 = 3,
+  CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3,
+  CRC32_32_SPECIAL_0_OPCODE_X0 = 9,
+  CRC32_8_SPECIAL_0_OPCODE_X0 = 10,
+  CTZ_UN_0_SHUN_0_OPCODE_X0 = 4,
+  CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4,
+  DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1,
+  DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2,
+  DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95,
+  FINV_UN_0_SHUN_0_OPCODE_X1 = 3,
+  FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4,
+  FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3,
+  FNOP_UN_0_SHUN_0_OPCODE_X0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_X1 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1,
+  HALT_NOREG_RR_IMM_0_OPCODE_SN = 0,
+  ICOH_UN_0_SHUN_0_OPCODE_X1 = 6,
+  ILL_UN_0_SHUN_0_OPCODE_X1 = 7,
+  ILL_UN_0_SHUN_0_OPCODE_Y1 = 2,
+  IMM_0_OPCODE_SN = 0,
+  IMM_0_OPCODE_X0 = 4,
+  IMM_0_OPCODE_X1 = 6,
+  IMM_1_OPCODE_SN = 1,
+  IMM_OPCODE_0_X0 = 5,
+  INTHB_SPECIAL_0_OPCODE_X0 = 11,
+  INTHB_SPECIAL_0_OPCODE_X1 = 5,
+  INTHH_SPECIAL_0_OPCODE_X0 = 12,
+  INTHH_SPECIAL_0_OPCODE_X1 = 6,
+  INTLB_SPECIAL_0_OPCODE_X0 = 13,
+  INTLB_SPECIAL_0_OPCODE_X1 = 7,
+  INTLH_SPECIAL_0_OPCODE_X0 = 14,
+  INTLH_SPECIAL_0_OPCODE_X1 = 8,
+  INV_UN_0_SHUN_0_OPCODE_X1 = 8,
+  IRET_UN_0_SHUN_0_OPCODE_X1 = 9,
+  JALB_OPCODE_X1 = 13,
+  JALF_OPCODE_X1 = 12,
+  JALRP_SPECIAL_0_OPCODE_X1 = 9,
+  JALRR_IMM_1_OPCODE_SN = 3,
+  JALR_RR_IMM_0_OPCODE_SN = 5,
+  JALR_SPECIAL_0_OPCODE_X1 = 10,
+  JB_OPCODE_X1 = 11,
+  JF_OPCODE_X1 = 10,
+  JRP_SPECIAL_0_OPCODE_X1 = 11,
+  JRR_IMM_1_OPCODE_SN = 2,
+  JR_RR_IMM_0_OPCODE_SN = 4,
+  JR_SPECIAL_0_OPCODE_X1 = 12,
+  LBADD_IMM_0_OPCODE_X1 = 22,
+  LBADD_U_IMM_0_OPCODE_X1 = 23,
+  LB_OPCODE_Y2 = 0,
+  LB_UN_0_SHUN_0_OPCODE_X1 = 10,
+  LB_U_OPCODE_Y2 = 1,
+  LB_U_UN_0_SHUN_0_OPCODE_X1 = 11,
+  LHADD_IMM_0_OPCODE_X1 = 24,
+  LHADD_U_IMM_0_OPCODE_X1 = 25,
+  LH_OPCODE_Y2 = 2,
+  LH_UN_0_SHUN_0_OPCODE_X1 = 12,
+  LH_U_OPCODE_Y2 = 3,
+  LH_U_UN_0_SHUN_0_OPCODE_X1 = 13,
+  LNK_SPECIAL_0_OPCODE_X1 = 13,
+  LWADD_IMM_0_OPCODE_X1 = 26,
+  LWADD_NA_IMM_0_OPCODE_X1 = 27,
+  LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24,
+  LW_OPCODE_Y2 = 4,
+  LW_UN_0_SHUN_0_OPCODE_X1 = 14,
+  MAXB_U_SPECIAL_0_OPCODE_X0 = 15,
+  MAXB_U_SPECIAL_0_OPCODE_X1 = 14,
+  MAXH_SPECIAL_0_OPCODE_X0 = 16,
+  MAXH_SPECIAL_0_OPCODE_X1 = 15,
+  MAXIB_U_IMM_0_OPCODE_X0 = 4,
+  MAXIB_U_IMM_0_OPCODE_X1 = 5,
+  MAXIH_IMM_0_OPCODE_X0 = 5,
+  MAXIH_IMM_0_OPCODE_X1 = 6,
+  MFSPR_IMM_0_OPCODE_X1 = 7,
+  MF_UN_0_SHUN_0_OPCODE_X1 = 15,
+  MINB_U_SPECIAL_0_OPCODE_X0 = 17,
+  MINB_U_SPECIAL_0_OPCODE_X1 = 16,
+  MINH_SPECIAL_0_OPCODE_X0 = 18,
+  MINH_SPECIAL_0_OPCODE_X1 = 17,
+  MINIB_U_IMM_0_OPCODE_X0 = 6,
+  MINIB_U_IMM_0_OPCODE_X1 = 8,
+  MINIH_IMM_0_OPCODE_X0 = 7,
+  MINIH_IMM_0_OPCODE_X1 = 9,
+  MM_OPCODE_X0 = 6,
+  MM_OPCODE_X1 = 7,
+  MNZB_SPECIAL_0_OPCODE_X0 = 19,
+  MNZB_SPECIAL_0_OPCODE_X1 = 18,
+  MNZH_SPECIAL_0_OPCODE_X0 = 20,
+  MNZH_SPECIAL_0_OPCODE_X1 = 19,
+  MNZ_SPECIAL_0_OPCODE_X0 = 21,
+  MNZ_SPECIAL_0_OPCODE_X1 = 20,
+  MNZ_SPECIAL_1_OPCODE_Y0 = 0,
+  MNZ_SPECIAL_1_OPCODE_Y1 = 1,
+  MOVEI_IMM_1_OPCODE_SN = 0,
+  MOVE_RR_IMM_0_OPCODE_SN = 8,
+  MTSPR_IMM_0_OPCODE_X1 = 10,
+  MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22,
+  MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0,
+  MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23,
+  MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24,
+  MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1,
+  MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25,
+  MULHH_SS_SPECIAL_0_OPCODE_X0 = 26,
+  MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0,
+  MULHH_SU_SPECIAL_0_OPCODE_X0 = 27,
+  MULHH_UU_SPECIAL_0_OPCODE_X0 = 28,
+  MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1,
+  MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29,
+  MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30,
+  MULHLA_US_SPECIAL_0_OPCODE_X0 = 31,
+  MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32,
+  MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33,
+  MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0,
+  MULHL_SS_SPECIAL_0_OPCODE_X0 = 34,
+  MULHL_SU_SPECIAL_0_OPCODE_X0 = 35,
+  MULHL_US_SPECIAL_0_OPCODE_X0 = 36,
+  MULHL_UU_SPECIAL_0_OPCODE_X0 = 37,
+  MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38,
+  MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2,
+  MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39,
+  MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40,
+  MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3,
+  MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41,
+  MULLL_SS_SPECIAL_0_OPCODE_X0 = 42,
+  MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2,
+  MULLL_SU_SPECIAL_0_OPCODE_X0 = 43,
+  MULLL_UU_SPECIAL_0_OPCODE_X0 = 44,
+  MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3,
+  MVNZ_SPECIAL_0_OPCODE_X0 = 45,
+  MVNZ_SPECIAL_1_OPCODE_Y0 = 1,
+  MVZ_SPECIAL_0_OPCODE_X0 = 46,
+  MVZ_SPECIAL_1_OPCODE_Y0 = 2,
+  MZB_SPECIAL_0_OPCODE_X0 = 47,
+  MZB_SPECIAL_0_OPCODE_X1 = 21,
+  MZH_SPECIAL_0_OPCODE_X0 = 48,
+  MZH_SPECIAL_0_OPCODE_X1 = 22,
+  MZ_SPECIAL_0_OPCODE_X0 = 49,
+  MZ_SPECIAL_0_OPCODE_X1 = 23,
+  MZ_SPECIAL_1_OPCODE_Y0 = 3,
+  MZ_SPECIAL_1_OPCODE_Y1 = 2,
+  NAP_UN_0_SHUN_0_OPCODE_X1 = 16,
+  NOP_NOREG_RR_IMM_0_OPCODE_SN = 2,
+  NOP_UN_0_SHUN_0_OPCODE_X0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_X1 = 17,
+  NOP_UN_0_SHUN_0_OPCODE_Y0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_Y1 = 3,
+  NOREG_RR_IMM_0_OPCODE_SN = 0,
+  NOR_SPECIAL_0_OPCODE_X0 = 50,
+  NOR_SPECIAL_0_OPCODE_X1 = 24,
+  NOR_SPECIAL_2_OPCODE_Y0 = 1,
+  NOR_SPECIAL_2_OPCODE_Y1 = 1,
+  ORI_IMM_0_OPCODE_X0 = 8,
+  ORI_IMM_0_OPCODE_X1 = 11,
+  ORI_OPCODE_Y0 = 11,
+  ORI_OPCODE_Y1 = 9,
+  OR_SPECIAL_0_OPCODE_X0 = 51,
+  OR_SPECIAL_0_OPCODE_X1 = 25,
+  OR_SPECIAL_2_OPCODE_Y0 = 2,
+  OR_SPECIAL_2_OPCODE_Y1 = 2,
+  PACKBS_U_SPECIAL_0_OPCODE_X0 = 103,
+  PACKBS_U_SPECIAL_0_OPCODE_X1 = 73,
+  PACKHB_SPECIAL_0_OPCODE_X0 = 52,
+  PACKHB_SPECIAL_0_OPCODE_X1 = 26,
+  PACKHS_SPECIAL_0_OPCODE_X0 = 102,
+  PACKHS_SPECIAL_0_OPCODE_X1 = 72,
+  PACKLB_SPECIAL_0_OPCODE_X0 = 53,
+  PACKLB_SPECIAL_0_OPCODE_X1 = 27,
+  PCNT_UN_0_SHUN_0_OPCODE_X0 = 7,
+  PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7,
+  RLI_SHUN_0_OPCODE_X0 = 1,
+  RLI_SHUN_0_OPCODE_X1 = 1,
+  RLI_SHUN_0_OPCODE_Y0 = 1,
+  RLI_SHUN_0_OPCODE_Y1 = 1,
+  RL_SPECIAL_0_OPCODE_X0 = 54,
+  RL_SPECIAL_0_OPCODE_X1 = 28,
+  RL_SPECIAL_3_OPCODE_Y0 = 0,
+  RL_SPECIAL_3_OPCODE_Y1 = 0,
+  RR_IMM_0_OPCODE_SN = 0,
+  S1A_SPECIAL_0_OPCODE_X0 = 55,
+  S1A_SPECIAL_0_OPCODE_X1 = 29,
+  S1A_SPECIAL_0_OPCODE_Y0 = 1,
+  S1A_SPECIAL_0_OPCODE_Y1 = 1,
+  S2A_SPECIAL_0_OPCODE_X0 = 56,
+  S2A_SPECIAL_0_OPCODE_X1 = 30,
+  S2A_SPECIAL_0_OPCODE_Y0 = 2,
+  S2A_SPECIAL_0_OPCODE_Y1 = 2,
+  S3A_SPECIAL_0_OPCODE_X0 = 57,
+  S3A_SPECIAL_0_OPCODE_X1 = 31,
+  S3A_SPECIAL_5_OPCODE_Y0 = 1,
+  S3A_SPECIAL_5_OPCODE_Y1 = 1,
+  SADAB_U_SPECIAL_0_OPCODE_X0 = 58,
+  SADAH_SPECIAL_0_OPCODE_X0 = 59,
+  SADAH_U_SPECIAL_0_OPCODE_X0 = 60,
+  SADB_U_SPECIAL_0_OPCODE_X0 = 61,
+  SADH_SPECIAL_0_OPCODE_X0 = 62,
+  SADH_U_SPECIAL_0_OPCODE_X0 = 63,
+  SBADD_IMM_0_OPCODE_X1 = 28,
+  SB_OPCODE_Y2 = 5,
+  SB_SPECIAL_0_OPCODE_X1 = 32,
+  SEQB_SPECIAL_0_OPCODE_X0 = 64,
+  SEQB_SPECIAL_0_OPCODE_X1 = 33,
+  SEQH_SPECIAL_0_OPCODE_X0 = 65,
+  SEQH_SPECIAL_0_OPCODE_X1 = 34,
+  SEQIB_IMM_0_OPCODE_X0 = 9,
+  SEQIB_IMM_0_OPCODE_X1 = 12,
+  SEQIH_IMM_0_OPCODE_X0 = 10,
+  SEQIH_IMM_0_OPCODE_X1 = 13,
+  SEQI_IMM_0_OPCODE_X0 = 11,
+  SEQI_IMM_0_OPCODE_X1 = 14,
+  SEQI_OPCODE_Y0 = 12,
+  SEQI_OPCODE_Y1 = 10,
+  SEQ_SPECIAL_0_OPCODE_X0 = 66,
+  SEQ_SPECIAL_0_OPCODE_X1 = 35,
+  SEQ_SPECIAL_5_OPCODE_Y0 = 2,
+  SEQ_SPECIAL_5_OPCODE_Y1 = 2,
+  SHADD_IMM_0_OPCODE_X1 = 29,
+  SHL8II_IMM_0_OPCODE_SN = 3,
+  SHLB_SPECIAL_0_OPCODE_X0 = 67,
+  SHLB_SPECIAL_0_OPCODE_X1 = 36,
+  SHLH_SPECIAL_0_OPCODE_X0 = 68,
+  SHLH_SPECIAL_0_OPCODE_X1 = 37,
+  SHLIB_SHUN_0_OPCODE_X0 = 2,
+  SHLIB_SHUN_0_OPCODE_X1 = 2,
+  SHLIH_SHUN_0_OPCODE_X0 = 3,
+  SHLIH_SHUN_0_OPCODE_X1 = 3,
+  SHLI_SHUN_0_OPCODE_X0 = 4,
+  SHLI_SHUN_0_OPCODE_X1 = 4,
+  SHLI_SHUN_0_OPCODE_Y0 = 2,
+  SHLI_SHUN_0_OPCODE_Y1 = 2,
+  SHL_SPECIAL_0_OPCODE_X0 = 69,
+  SHL_SPECIAL_0_OPCODE_X1 = 38,
+  SHL_SPECIAL_3_OPCODE_Y0 = 1,
+  SHL_SPECIAL_3_OPCODE_Y1 = 1,
+  SHR1_RR_IMM_0_OPCODE_SN = 9,
+  SHRB_SPECIAL_0_OPCODE_X0 = 70,
+  SHRB_SPECIAL_0_OPCODE_X1 = 39,
+  SHRH_SPECIAL_0_OPCODE_X0 = 71,
+  SHRH_SPECIAL_0_OPCODE_X1 = 40,
+  SHRIB_SHUN_0_OPCODE_X0 = 5,
+  SHRIB_SHUN_0_OPCODE_X1 = 5,
+  SHRIH_SHUN_0_OPCODE_X0 = 6,
+  SHRIH_SHUN_0_OPCODE_X1 = 6,
+  SHRI_SHUN_0_OPCODE_X0 = 7,
+  SHRI_SHUN_0_OPCODE_X1 = 7,
+  SHRI_SHUN_0_OPCODE_Y0 = 3,
+  SHRI_SHUN_0_OPCODE_Y1 = 3,
+  SHR_SPECIAL_0_OPCODE_X0 = 72,
+  SHR_SPECIAL_0_OPCODE_X1 = 41,
+  SHR_SPECIAL_3_OPCODE_Y0 = 2,
+  SHR_SPECIAL_3_OPCODE_Y1 = 2,
+  SHUN_0_OPCODE_X0 = 7,
+  SHUN_0_OPCODE_X1 = 8,
+  SHUN_0_OPCODE_Y0 = 13,
+  SHUN_0_OPCODE_Y1 = 11,
+  SH_OPCODE_Y2 = 6,
+  SH_SPECIAL_0_OPCODE_X1 = 42,
+  SLTB_SPECIAL_0_OPCODE_X0 = 73,
+  SLTB_SPECIAL_0_OPCODE_X1 = 43,
+  SLTB_U_SPECIAL_0_OPCODE_X0 = 74,
+  SLTB_U_SPECIAL_0_OPCODE_X1 = 44,
+  SLTEB_SPECIAL_0_OPCODE_X0 = 75,
+  SLTEB_SPECIAL_0_OPCODE_X1 = 45,
+  SLTEB_U_SPECIAL_0_OPCODE_X0 = 76,
+  SLTEB_U_SPECIAL_0_OPCODE_X1 = 46,
+  SLTEH_SPECIAL_0_OPCODE_X0 = 77,
+  SLTEH_SPECIAL_0_OPCODE_X1 = 47,
+  SLTEH_U_SPECIAL_0_OPCODE_X0 = 78,
+  SLTEH_U_SPECIAL_0_OPCODE_X1 = 48,
+  SLTE_SPECIAL_0_OPCODE_X0 = 79,
+  SLTE_SPECIAL_0_OPCODE_X1 = 49,
+  SLTE_SPECIAL_4_OPCODE_Y0 = 0,
+  SLTE_SPECIAL_4_OPCODE_Y1 = 0,
+  SLTE_U_SPECIAL_0_OPCODE_X0 = 80,
+  SLTE_U_SPECIAL_0_OPCODE_X1 = 50,
+  SLTE_U_SPECIAL_4_OPCODE_Y0 = 1,
+  SLTE_U_SPECIAL_4_OPCODE_Y1 = 1,
+  SLTH_SPECIAL_0_OPCODE_X0 = 81,
+  SLTH_SPECIAL_0_OPCODE_X1 = 51,
+  SLTH_U_SPECIAL_0_OPCODE_X0 = 82,
+  SLTH_U_SPECIAL_0_OPCODE_X1 = 52,
+  SLTIB_IMM_0_OPCODE_X0 = 12,
+  SLTIB_IMM_0_OPCODE_X1 = 15,
+  SLTIB_U_IMM_0_OPCODE_X0 = 13,
+  SLTIB_U_IMM_0_OPCODE_X1 = 16,
+  SLTIH_IMM_0_OPCODE_X0 = 14,
+  SLTIH_IMM_0_OPCODE_X1 = 17,
+  SLTIH_U_IMM_0_OPCODE_X0 = 15,
+  SLTIH_U_IMM_0_OPCODE_X1 = 18,
+  SLTI_IMM_0_OPCODE_X0 = 16,
+  SLTI_IMM_0_OPCODE_X1 = 19,
+  SLTI_OPCODE_Y0 = 14,
+  SLTI_OPCODE_Y1 = 12,
+  SLTI_U_IMM_0_OPCODE_X0 = 17,
+  SLTI_U_IMM_0_OPCODE_X1 = 20,
+  SLTI_U_OPCODE_Y0 = 15,
+  SLTI_U_OPCODE_Y1 = 13,
+  SLT_SPECIAL_0_OPCODE_X0 = 83,
+  SLT_SPECIAL_0_OPCODE_X1 = 53,
+  SLT_SPECIAL_4_OPCODE_Y0 = 2,
+  SLT_SPECIAL_4_OPCODE_Y1 = 2,
+  SLT_U_SPECIAL_0_OPCODE_X0 = 84,
+  SLT_U_SPECIAL_0_OPCODE_X1 = 54,
+  SLT_U_SPECIAL_4_OPCODE_Y0 = 3,
+  SLT_U_SPECIAL_4_OPCODE_Y1 = 3,
+  SNEB_SPECIAL_0_OPCODE_X0 = 85,
+  SNEB_SPECIAL_0_OPCODE_X1 = 55,
+  SNEH_SPECIAL_0_OPCODE_X0 = 86,
+  SNEH_SPECIAL_0_OPCODE_X1 = 56,
+  SNE_SPECIAL_0_OPCODE_X0 = 87,
+  SNE_SPECIAL_0_OPCODE_X1 = 57,
+  SNE_SPECIAL_5_OPCODE_Y0 = 3,
+  SNE_SPECIAL_5_OPCODE_Y1 = 3,
+  SPECIAL_0_OPCODE_X0 = 0,
+  SPECIAL_0_OPCODE_X1 = 1,
+  SPECIAL_0_OPCODE_Y0 = 1,
+  SPECIAL_0_OPCODE_Y1 = 1,
+  SPECIAL_1_OPCODE_Y0 = 2,
+  SPECIAL_1_OPCODE_Y1 = 2,
+  SPECIAL_2_OPCODE_Y0 = 3,
+  SPECIAL_2_OPCODE_Y1 = 3,
+  SPECIAL_3_OPCODE_Y0 = 4,
+  SPECIAL_3_OPCODE_Y1 = 4,
+  SPECIAL_4_OPCODE_Y0 = 5,
+  SPECIAL_4_OPCODE_Y1 = 5,
+  SPECIAL_5_OPCODE_Y0 = 6,
+  SPECIAL_5_OPCODE_Y1 = 6,
+  SPECIAL_6_OPCODE_Y0 = 7,
+  SPECIAL_7_OPCODE_Y0 = 8,
+  SRAB_SPECIAL_0_OPCODE_X0 = 88,
+  SRAB_SPECIAL_0_OPCODE_X1 = 58,
+  SRAH_SPECIAL_0_OPCODE_X0 = 89,
+  SRAH_SPECIAL_0_OPCODE_X1 = 59,
+  SRAIB_SHUN_0_OPCODE_X0 = 8,
+  SRAIB_SHUN_0_OPCODE_X1 = 8,
+  SRAIH_SHUN_0_OPCODE_X0 = 9,
+  SRAIH_SHUN_0_OPCODE_X1 = 9,
+  SRAI_SHUN_0_OPCODE_X0 = 10,
+  SRAI_SHUN_0_OPCODE_X1 = 10,
+  SRAI_SHUN_0_OPCODE_Y0 = 4,
+  SRAI_SHUN_0_OPCODE_Y1 = 4,
+  SRA_SPECIAL_0_OPCODE_X0 = 90,
+  SRA_SPECIAL_0_OPCODE_X1 = 60,
+  SRA_SPECIAL_3_OPCODE_Y0 = 3,
+  SRA_SPECIAL_3_OPCODE_Y1 = 3,
+  SUBBS_U_SPECIAL_0_OPCODE_X0 = 100,
+  SUBBS_U_SPECIAL_0_OPCODE_X1 = 70,
+  SUBB_SPECIAL_0_OPCODE_X0 = 91,
+  SUBB_SPECIAL_0_OPCODE_X1 = 61,
+  SUBHS_SPECIAL_0_OPCODE_X0 = 101,
+  SUBHS_SPECIAL_0_OPCODE_X1 = 71,
+  SUBH_SPECIAL_0_OPCODE_X0 = 92,
+  SUBH_SPECIAL_0_OPCODE_X1 = 62,
+  SUBS_SPECIAL_0_OPCODE_X0 = 97,
+  SUBS_SPECIAL_0_OPCODE_X1 = 67,
+  SUB_SPECIAL_0_OPCODE_X0 = 93,
+  SUB_SPECIAL_0_OPCODE_X1 = 63,
+  SUB_SPECIAL_0_OPCODE_Y0 = 3,
+  SUB_SPECIAL_0_OPCODE_Y1 = 3,
+  SWADD_IMM_0_OPCODE_X1 = 30,
+  SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18,
+  SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19,
+  SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20,
+  SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21,
+  SW_OPCODE_Y2 = 7,
+  SW_SPECIAL_0_OPCODE_X1 = 64,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11,
+  TNS_UN_0_SHUN_0_OPCODE_X1 = 22,
+  UN_0_SHUN_0_OPCODE_X0 = 11,
+  UN_0_SHUN_0_OPCODE_X1 = 11,
+  UN_0_SHUN_0_OPCODE_Y0 = 5,
+  UN_0_SHUN_0_OPCODE_Y1 = 5,
+  WH64_UN_0_SHUN_0_OPCODE_X1 = 23,
+  XORI_IMM_0_OPCODE_X0 = 2,
+  XORI_IMM_0_OPCODE_X1 = 21,
+  XOR_SPECIAL_0_OPCODE_X0 = 94,
+  XOR_SPECIAL_0_OPCODE_X1 = 65,
+  XOR_SPECIAL_2_OPCODE_Y0 = 3,
+  XOR_SPECIAL_2_OPCODE_Y1 = 3
+};
+
+#endif /* !_TILE_OPCODE_CONSTANTS_H */
diff --git a/arch/tile/include/asm/opcode_constants_64.h b/arch/tile/include/asm/opcode_constants_64.h
new file mode 100644
index 0000000..227d033
--- /dev/null
+++ b/arch/tile/include/asm/opcode_constants_64.h
@@ -0,0 +1,480 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+
+
+#ifndef _TILE_OPCODE_CONSTANTS_H
+#define _TILE_OPCODE_CONSTANTS_H
+enum
+{
+  ADDBS_U_SPECIAL_0_OPCODE_X0 = 98,
+  ADDBS_U_SPECIAL_0_OPCODE_X1 = 68,
+  ADDB_SPECIAL_0_OPCODE_X0 = 1,
+  ADDB_SPECIAL_0_OPCODE_X1 = 1,
+  ADDHS_SPECIAL_0_OPCODE_X0 = 99,
+  ADDHS_SPECIAL_0_OPCODE_X1 = 69,
+  ADDH_SPECIAL_0_OPCODE_X0 = 2,
+  ADDH_SPECIAL_0_OPCODE_X1 = 2,
+  ADDIB_IMM_0_OPCODE_X0 = 1,
+  ADDIB_IMM_0_OPCODE_X1 = 1,
+  ADDIH_IMM_0_OPCODE_X0 = 2,
+  ADDIH_IMM_0_OPCODE_X1 = 2,
+  ADDI_IMM_0_OPCODE_X0 = 3,
+  ADDI_IMM_0_OPCODE_X1 = 3,
+  ADDI_IMM_1_OPCODE_SN = 1,
+  ADDI_OPCODE_Y0 = 9,
+  ADDI_OPCODE_Y1 = 7,
+  ADDLIS_OPCODE_X0 = 1,
+  ADDLIS_OPCODE_X1 = 2,
+  ADDLI_OPCODE_X0 = 2,
+  ADDLI_OPCODE_X1 = 3,
+  ADDS_SPECIAL_0_OPCODE_X0 = 96,
+  ADDS_SPECIAL_0_OPCODE_X1 = 66,
+  ADD_SPECIAL_0_OPCODE_X0 = 3,
+  ADD_SPECIAL_0_OPCODE_X1 = 3,
+  ADD_SPECIAL_0_OPCODE_Y0 = 0,
+  ADD_SPECIAL_0_OPCODE_Y1 = 0,
+  ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4,
+  ADIFFH_SPECIAL_0_OPCODE_X0 = 5,
+  ANDI_IMM_0_OPCODE_X0 = 1,
+  ANDI_IMM_0_OPCODE_X1 = 4,
+  ANDI_OPCODE_Y0 = 10,
+  ANDI_OPCODE_Y1 = 8,
+  AND_SPECIAL_0_OPCODE_X0 = 6,
+  AND_SPECIAL_0_OPCODE_X1 = 4,
+  AND_SPECIAL_2_OPCODE_Y0 = 0,
+  AND_SPECIAL_2_OPCODE_Y1 = 0,
+  AULI_OPCODE_X0 = 3,
+  AULI_OPCODE_X1 = 4,
+  AVGB_U_SPECIAL_0_OPCODE_X0 = 7,
+  AVGH_SPECIAL_0_OPCODE_X0 = 8,
+  BBNST_BRANCH_OPCODE_X1 = 15,
+  BBNS_BRANCH_OPCODE_X1 = 14,
+  BBNS_OPCODE_SN = 63,
+  BBST_BRANCH_OPCODE_X1 = 13,
+  BBS_BRANCH_OPCODE_X1 = 12,
+  BBS_OPCODE_SN = 62,
+  BGEZT_BRANCH_OPCODE_X1 = 7,
+  BGEZ_BRANCH_OPCODE_X1 = 6,
+  BGEZ_OPCODE_SN = 61,
+  BGZT_BRANCH_OPCODE_X1 = 5,
+  BGZ_BRANCH_OPCODE_X1 = 4,
+  BGZ_OPCODE_SN = 58,
+  BITX_UN_0_SHUN_0_OPCODE_X0 = 1,
+  BITX_UN_0_SHUN_0_OPCODE_Y0 = 1,
+  BLEZT_BRANCH_OPCODE_X1 = 11,
+  BLEZ_BRANCH_OPCODE_X1 = 10,
+  BLEZ_OPCODE_SN = 59,
+  BLZT_BRANCH_OPCODE_X1 = 9,
+  BLZ_BRANCH_OPCODE_X1 = 8,
+  BLZ_OPCODE_SN = 60,
+  BNZT_BRANCH_OPCODE_X1 = 3,
+  BNZ_BRANCH_OPCODE_X1 = 2,
+  BNZ_OPCODE_SN = 57,
+  BPT_NOREG_RR_IMM_0_OPCODE_SN = 1,
+  BRANCH_OPCODE_X1 = 5,
+  BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2,
+  BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2,
+  BZT_BRANCH_OPCODE_X1 = 1,
+  BZ_BRANCH_OPCODE_X1 = 0,
+  BZ_OPCODE_SN = 56,
+  CLZ_UN_0_SHUN_0_OPCODE_X0 = 3,
+  CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3,
+  CRC32_32_SPECIAL_0_OPCODE_X0 = 9,
+  CRC32_8_SPECIAL_0_OPCODE_X0 = 10,
+  CTZ_UN_0_SHUN_0_OPCODE_X0 = 4,
+  CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4,
+  DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1,
+  DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2,
+  DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95,
+  FINV_UN_0_SHUN_0_OPCODE_X1 = 3,
+  FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4,
+  FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3,
+  FNOP_UN_0_SHUN_0_OPCODE_X0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_X1 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1,
+  HALT_NOREG_RR_IMM_0_OPCODE_SN = 0,
+  ICOH_UN_0_SHUN_0_OPCODE_X1 = 6,
+  ILL_UN_0_SHUN_0_OPCODE_X1 = 7,
+  ILL_UN_0_SHUN_0_OPCODE_Y1 = 2,
+  IMM_0_OPCODE_SN = 0,
+  IMM_0_OPCODE_X0 = 4,
+  IMM_0_OPCODE_X1 = 6,
+  IMM_1_OPCODE_SN = 1,
+  IMM_OPCODE_0_X0 = 5,
+  INTHB_SPECIAL_0_OPCODE_X0 = 11,
+  INTHB_SPECIAL_0_OPCODE_X1 = 5,
+  INTHH_SPECIAL_0_OPCODE_X0 = 12,
+  INTHH_SPECIAL_0_OPCODE_X1 = 6,
+  INTLB_SPECIAL_0_OPCODE_X0 = 13,
+  INTLB_SPECIAL_0_OPCODE_X1 = 7,
+  INTLH_SPECIAL_0_OPCODE_X0 = 14,
+  INTLH_SPECIAL_0_OPCODE_X1 = 8,
+  INV_UN_0_SHUN_0_OPCODE_X1 = 8,
+  IRET_UN_0_SHUN_0_OPCODE_X1 = 9,
+  JALB_OPCODE_X1 = 13,
+  JALF_OPCODE_X1 = 12,
+  JALRP_SPECIAL_0_OPCODE_X1 = 9,
+  JALRR_IMM_1_OPCODE_SN = 3,
+  JALR_RR_IMM_0_OPCODE_SN = 5,
+  JALR_SPECIAL_0_OPCODE_X1 = 10,
+  JB_OPCODE_X1 = 11,
+  JF_OPCODE_X1 = 10,
+  JRP_SPECIAL_0_OPCODE_X1 = 11,
+  JRR_IMM_1_OPCODE_SN = 2,
+  JR_RR_IMM_0_OPCODE_SN = 4,
+  JR_SPECIAL_0_OPCODE_X1 = 12,
+  LBADD_IMM_0_OPCODE_X1 = 22,
+  LBADD_U_IMM_0_OPCODE_X1 = 23,
+  LB_OPCODE_Y2 = 0,
+  LB_UN_0_SHUN_0_OPCODE_X1 = 10,
+  LB_U_OPCODE_Y2 = 1,
+  LB_U_UN_0_SHUN_0_OPCODE_X1 = 11,
+  LHADD_IMM_0_OPCODE_X1 = 24,
+  LHADD_U_IMM_0_OPCODE_X1 = 25,
+  LH_OPCODE_Y2 = 2,
+  LH_UN_0_SHUN_0_OPCODE_X1 = 12,
+  LH_U_OPCODE_Y2 = 3,
+  LH_U_UN_0_SHUN_0_OPCODE_X1 = 13,
+  LNK_SPECIAL_0_OPCODE_X1 = 13,
+  LWADD_IMM_0_OPCODE_X1 = 26,
+  LWADD_NA_IMM_0_OPCODE_X1 = 27,
+  LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24,
+  LW_OPCODE_Y2 = 4,
+  LW_UN_0_SHUN_0_OPCODE_X1 = 14,
+  MAXB_U_SPECIAL_0_OPCODE_X0 = 15,
+  MAXB_U_SPECIAL_0_OPCODE_X1 = 14,
+  MAXH_SPECIAL_0_OPCODE_X0 = 16,
+  MAXH_SPECIAL_0_OPCODE_X1 = 15,
+  MAXIB_U_IMM_0_OPCODE_X0 = 4,
+  MAXIB_U_IMM_0_OPCODE_X1 = 5,
+  MAXIH_IMM_0_OPCODE_X0 = 5,
+  MAXIH_IMM_0_OPCODE_X1 = 6,
+  MFSPR_IMM_0_OPCODE_X1 = 7,
+  MF_UN_0_SHUN_0_OPCODE_X1 = 15,
+  MINB_U_SPECIAL_0_OPCODE_X0 = 17,
+  MINB_U_SPECIAL_0_OPCODE_X1 = 16,
+  MINH_SPECIAL_0_OPCODE_X0 = 18,
+  MINH_SPECIAL_0_OPCODE_X1 = 17,
+  MINIB_U_IMM_0_OPCODE_X0 = 6,
+  MINIB_U_IMM_0_OPCODE_X1 = 8,
+  MINIH_IMM_0_OPCODE_X0 = 7,
+  MINIH_IMM_0_OPCODE_X1 = 9,
+  MM_OPCODE_X0 = 6,
+  MM_OPCODE_X1 = 7,
+  MNZB_SPECIAL_0_OPCODE_X0 = 19,
+  MNZB_SPECIAL_0_OPCODE_X1 = 18,
+  MNZH_SPECIAL_0_OPCODE_X0 = 20,
+  MNZH_SPECIAL_0_OPCODE_X1 = 19,
+  MNZ_SPECIAL_0_OPCODE_X0 = 21,
+  MNZ_SPECIAL_0_OPCODE_X1 = 20,
+  MNZ_SPECIAL_1_OPCODE_Y0 = 0,
+  MNZ_SPECIAL_1_OPCODE_Y1 = 1,
+  MOVEI_IMM_1_OPCODE_SN = 0,
+  MOVE_RR_IMM_0_OPCODE_SN = 8,
+  MTSPR_IMM_0_OPCODE_X1 = 10,
+  MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22,
+  MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0,
+  MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23,
+  MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24,
+  MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1,
+  MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25,
+  MULHH_SS_SPECIAL_0_OPCODE_X0 = 26,
+  MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0,
+  MULHH_SU_SPECIAL_0_OPCODE_X0 = 27,
+  MULHH_UU_SPECIAL_0_OPCODE_X0 = 28,
+  MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1,
+  MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29,
+  MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30,
+  MULHLA_US_SPECIAL_0_OPCODE_X0 = 31,
+  MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32,
+  MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33,
+  MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0,
+  MULHL_SS_SPECIAL_0_OPCODE_X0 = 34,
+  MULHL_SU_SPECIAL_0_OPCODE_X0 = 35,
+  MULHL_US_SPECIAL_0_OPCODE_X0 = 36,
+  MULHL_UU_SPECIAL_0_OPCODE_X0 = 37,
+  MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38,
+  MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2,
+  MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39,
+  MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40,
+  MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3,
+  MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41,
+  MULLL_SS_SPECIAL_0_OPCODE_X0 = 42,
+  MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2,
+  MULLL_SU_SPECIAL_0_OPCODE_X0 = 43,
+  MULLL_UU_SPECIAL_0_OPCODE_X0 = 44,
+  MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3,
+  MVNZ_SPECIAL_0_OPCODE_X0 = 45,
+  MVNZ_SPECIAL_1_OPCODE_Y0 = 1,
+  MVZ_SPECIAL_0_OPCODE_X0 = 46,
+  MVZ_SPECIAL_1_OPCODE_Y0 = 2,
+  MZB_SPECIAL_0_OPCODE_X0 = 47,
+  MZB_SPECIAL_0_OPCODE_X1 = 21,
+  MZH_SPECIAL_0_OPCODE_X0 = 48,
+  MZH_SPECIAL_0_OPCODE_X1 = 22,
+  MZ_SPECIAL_0_OPCODE_X0 = 49,
+  MZ_SPECIAL_0_OPCODE_X1 = 23,
+  MZ_SPECIAL_1_OPCODE_Y0 = 3,
+  MZ_SPECIAL_1_OPCODE_Y1 = 2,
+  NAP_UN_0_SHUN_0_OPCODE_X1 = 16,
+  NOP_NOREG_RR_IMM_0_OPCODE_SN = 2,
+  NOP_UN_0_SHUN_0_OPCODE_X0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_X1 = 17,
+  NOP_UN_0_SHUN_0_OPCODE_Y0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_Y1 = 3,
+  NOREG_RR_IMM_0_OPCODE_SN = 0,
+  NOR_SPECIAL_0_OPCODE_X0 = 50,
+  NOR_SPECIAL_0_OPCODE_X1 = 24,
+  NOR_SPECIAL_2_OPCODE_Y0 = 1,
+  NOR_SPECIAL_2_OPCODE_Y1 = 1,
+  ORI_IMM_0_OPCODE_X0 = 8,
+  ORI_IMM_0_OPCODE_X1 = 11,
+  ORI_OPCODE_Y0 = 11,
+  ORI_OPCODE_Y1 = 9,
+  OR_SPECIAL_0_OPCODE_X0 = 51,
+  OR_SPECIAL_0_OPCODE_X1 = 25,
+  OR_SPECIAL_2_OPCODE_Y0 = 2,
+  OR_SPECIAL_2_OPCODE_Y1 = 2,
+  PACKBS_U_SPECIAL_0_OPCODE_X0 = 103,
+  PACKBS_U_SPECIAL_0_OPCODE_X1 = 73,
+  PACKHB_SPECIAL_0_OPCODE_X0 = 52,
+  PACKHB_SPECIAL_0_OPCODE_X1 = 26,
+  PACKHS_SPECIAL_0_OPCODE_X0 = 102,
+  PACKHS_SPECIAL_0_OPCODE_X1 = 72,
+  PACKLB_SPECIAL_0_OPCODE_X0 = 53,
+  PACKLB_SPECIAL_0_OPCODE_X1 = 27,
+  PCNT_UN_0_SHUN_0_OPCODE_X0 = 7,
+  PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7,
+  RLI_SHUN_0_OPCODE_X0 = 1,
+  RLI_SHUN_0_OPCODE_X1 = 1,
+  RLI_SHUN_0_OPCODE_Y0 = 1,
+  RLI_SHUN_0_OPCODE_Y1 = 1,
+  RL_SPECIAL_0_OPCODE_X0 = 54,
+  RL_SPECIAL_0_OPCODE_X1 = 28,
+  RL_SPECIAL_3_OPCODE_Y0 = 0,
+  RL_SPECIAL_3_OPCODE_Y1 = 0,
+  RR_IMM_0_OPCODE_SN = 0,
+  S1A_SPECIAL_0_OPCODE_X0 = 55,
+  S1A_SPECIAL_0_OPCODE_X1 = 29,
+  S1A_SPECIAL_0_OPCODE_Y0 = 1,
+  S1A_SPECIAL_0_OPCODE_Y1 = 1,
+  S2A_SPECIAL_0_OPCODE_X0 = 56,
+  S2A_SPECIAL_0_OPCODE_X1 = 30,
+  S2A_SPECIAL_0_OPCODE_Y0 = 2,
+  S2A_SPECIAL_0_OPCODE_Y1 = 2,
+  S3A_SPECIAL_0_OPCODE_X0 = 57,
+  S3A_SPECIAL_0_OPCODE_X1 = 31,
+  S3A_SPECIAL_5_OPCODE_Y0 = 1,
+  S3A_SPECIAL_5_OPCODE_Y1 = 1,
+  SADAB_U_SPECIAL_0_OPCODE_X0 = 58,
+  SADAH_SPECIAL_0_OPCODE_X0 = 59,
+  SADAH_U_SPECIAL_0_OPCODE_X0 = 60,
+  SADB_U_SPECIAL_0_OPCODE_X0 = 61,
+  SADH_SPECIAL_0_OPCODE_X0 = 62,
+  SADH_U_SPECIAL_0_OPCODE_X0 = 63,
+  SBADD_IMM_0_OPCODE_X1 = 28,
+  SB_OPCODE_Y2 = 5,
+  SB_SPECIAL_0_OPCODE_X1 = 32,
+  SEQB_SPECIAL_0_OPCODE_X0 = 64,
+  SEQB_SPECIAL_0_OPCODE_X1 = 33,
+  SEQH_SPECIAL_0_OPCODE_X0 = 65,
+  SEQH_SPECIAL_0_OPCODE_X1 = 34,
+  SEQIB_IMM_0_OPCODE_X0 = 9,
+  SEQIB_IMM_0_OPCODE_X1 = 12,
+  SEQIH_IMM_0_OPCODE_X0 = 10,
+  SEQIH_IMM_0_OPCODE_X1 = 13,
+  SEQI_IMM_0_OPCODE_X0 = 11,
+  SEQI_IMM_0_OPCODE_X1 = 14,
+  SEQI_OPCODE_Y0 = 12,
+  SEQI_OPCODE_Y1 = 10,
+  SEQ_SPECIAL_0_OPCODE_X0 = 66,
+  SEQ_SPECIAL_0_OPCODE_X1 = 35,
+  SEQ_SPECIAL_5_OPCODE_Y0 = 2,
+  SEQ_SPECIAL_5_OPCODE_Y1 = 2,
+  SHADD_IMM_0_OPCODE_X1 = 29,
+  SHL8II_IMM_0_OPCODE_SN = 3,
+  SHLB_SPECIAL_0_OPCODE_X0 = 67,
+  SHLB_SPECIAL_0_OPCODE_X1 = 36,
+  SHLH_SPECIAL_0_OPCODE_X0 = 68,
+  SHLH_SPECIAL_0_OPCODE_X1 = 37,
+  SHLIB_SHUN_0_OPCODE_X0 = 2,
+  SHLIB_SHUN_0_OPCODE_X1 = 2,
+  SHLIH_SHUN_0_OPCODE_X0 = 3,
+  SHLIH_SHUN_0_OPCODE_X1 = 3,
+  SHLI_SHUN_0_OPCODE_X0 = 4,
+  SHLI_SHUN_0_OPCODE_X1 = 4,
+  SHLI_SHUN_0_OPCODE_Y0 = 2,
+  SHLI_SHUN_0_OPCODE_Y1 = 2,
+  SHL_SPECIAL_0_OPCODE_X0 = 69,
+  SHL_SPECIAL_0_OPCODE_X1 = 38,
+  SHL_SPECIAL_3_OPCODE_Y0 = 1,
+  SHL_SPECIAL_3_OPCODE_Y1 = 1,
+  SHR1_RR_IMM_0_OPCODE_SN = 9,
+  SHRB_SPECIAL_0_OPCODE_X0 = 70,
+  SHRB_SPECIAL_0_OPCODE_X1 = 39,
+  SHRH_SPECIAL_0_OPCODE_X0 = 71,
+  SHRH_SPECIAL_0_OPCODE_X1 = 40,
+  SHRIB_SHUN_0_OPCODE_X0 = 5,
+  SHRIB_SHUN_0_OPCODE_X1 = 5,
+  SHRIH_SHUN_0_OPCODE_X0 = 6,
+  SHRIH_SHUN_0_OPCODE_X1 = 6,
+  SHRI_SHUN_0_OPCODE_X0 = 7,
+  SHRI_SHUN_0_OPCODE_X1 = 7,
+  SHRI_SHUN_0_OPCODE_Y0 = 3,
+  SHRI_SHUN_0_OPCODE_Y1 = 3,
+  SHR_SPECIAL_0_OPCODE_X0 = 72,
+  SHR_SPECIAL_0_OPCODE_X1 = 41,
+  SHR_SPECIAL_3_OPCODE_Y0 = 2,
+  SHR_SPECIAL_3_OPCODE_Y1 = 2,
+  SHUN_0_OPCODE_X0 = 7,
+  SHUN_0_OPCODE_X1 = 8,
+  SHUN_0_OPCODE_Y0 = 13,
+  SHUN_0_OPCODE_Y1 = 11,
+  SH_OPCODE_Y2 = 6,
+  SH_SPECIAL_0_OPCODE_X1 = 42,
+  SLTB_SPECIAL_0_OPCODE_X0 = 73,
+  SLTB_SPECIAL_0_OPCODE_X1 = 43,
+  SLTB_U_SPECIAL_0_OPCODE_X0 = 74,
+  SLTB_U_SPECIAL_0_OPCODE_X1 = 44,
+  SLTEB_SPECIAL_0_OPCODE_X0 = 75,
+  SLTEB_SPECIAL_0_OPCODE_X1 = 45,
+  SLTEB_U_SPECIAL_0_OPCODE_X0 = 76,
+  SLTEB_U_SPECIAL_0_OPCODE_X1 = 46,
+  SLTEH_SPECIAL_0_OPCODE_X0 = 77,
+  SLTEH_SPECIAL_0_OPCODE_X1 = 47,
+  SLTEH_U_SPECIAL_0_OPCODE_X0 = 78,
+  SLTEH_U_SPECIAL_0_OPCODE_X1 = 48,
+  SLTE_SPECIAL_0_OPCODE_X0 = 79,
+  SLTE_SPECIAL_0_OPCODE_X1 = 49,
+  SLTE_SPECIAL_4_OPCODE_Y0 = 0,
+  SLTE_SPECIAL_4_OPCODE_Y1 = 0,
+  SLTE_U_SPECIAL_0_OPCODE_X0 = 80,
+  SLTE_U_SPECIAL_0_OPCODE_X1 = 50,
+  SLTE_U_SPECIAL_4_OPCODE_Y0 = 1,
+  SLTE_U_SPECIAL_4_OPCODE_Y1 = 1,
+  SLTH_SPECIAL_0_OPCODE_X0 = 81,
+  SLTH_SPECIAL_0_OPCODE_X1 = 51,
+  SLTH_U_SPECIAL_0_OPCODE_X0 = 82,
+  SLTH_U_SPECIAL_0_OPCODE_X1 = 52,
+  SLTIB_IMM_0_OPCODE_X0 = 12,
+  SLTIB_IMM_0_OPCODE_X1 = 15,
+  SLTIB_U_IMM_0_OPCODE_X0 = 13,
+  SLTIB_U_IMM_0_OPCODE_X1 = 16,
+  SLTIH_IMM_0_OPCODE_X0 = 14,
+  SLTIH_IMM_0_OPCODE_X1 = 17,
+  SLTIH_U_IMM_0_OPCODE_X0 = 15,
+  SLTIH_U_IMM_0_OPCODE_X1 = 18,
+  SLTI_IMM_0_OPCODE_X0 = 16,
+  SLTI_IMM_0_OPCODE_X1 = 19,
+  SLTI_OPCODE_Y0 = 14,
+  SLTI_OPCODE_Y1 = 12,
+  SLTI_U_IMM_0_OPCODE_X0 = 17,
+  SLTI_U_IMM_0_OPCODE_X1 = 20,
+  SLTI_U_OPCODE_Y0 = 15,
+  SLTI_U_OPCODE_Y1 = 13,
+  SLT_SPECIAL_0_OPCODE_X0 = 83,
+  SLT_SPECIAL_0_OPCODE_X1 = 53,
+  SLT_SPECIAL_4_OPCODE_Y0 = 2,
+  SLT_SPECIAL_4_OPCODE_Y1 = 2,
+  SLT_U_SPECIAL_0_OPCODE_X0 = 84,
+  SLT_U_SPECIAL_0_OPCODE_X1 = 54,
+  SLT_U_SPECIAL_4_OPCODE_Y0 = 3,
+  SLT_U_SPECIAL_4_OPCODE_Y1 = 3,
+  SNEB_SPECIAL_0_OPCODE_X0 = 85,
+  SNEB_SPECIAL_0_OPCODE_X1 = 55,
+  SNEH_SPECIAL_0_OPCODE_X0 = 86,
+  SNEH_SPECIAL_0_OPCODE_X1 = 56,
+  SNE_SPECIAL_0_OPCODE_X0 = 87,
+  SNE_SPECIAL_0_OPCODE_X1 = 57,
+  SNE_SPECIAL_5_OPCODE_Y0 = 3,
+  SNE_SPECIAL_5_OPCODE_Y1 = 3,
+  SPECIAL_0_OPCODE_X0 = 0,
+  SPECIAL_0_OPCODE_X1 = 1,
+  SPECIAL_0_OPCODE_Y0 = 1,
+  SPECIAL_0_OPCODE_Y1 = 1,
+  SPECIAL_1_OPCODE_Y0 = 2,
+  SPECIAL_1_OPCODE_Y1 = 2,
+  SPECIAL_2_OPCODE_Y0 = 3,
+  SPECIAL_2_OPCODE_Y1 = 3,
+  SPECIAL_3_OPCODE_Y0 = 4,
+  SPECIAL_3_OPCODE_Y1 = 4,
+  SPECIAL_4_OPCODE_Y0 = 5,
+  SPECIAL_4_OPCODE_Y1 = 5,
+  SPECIAL_5_OPCODE_Y0 = 6,
+  SPECIAL_5_OPCODE_Y1 = 6,
+  SPECIAL_6_OPCODE_Y0 = 7,
+  SPECIAL_7_OPCODE_Y0 = 8,
+  SRAB_SPECIAL_0_OPCODE_X0 = 88,
+  SRAB_SPECIAL_0_OPCODE_X1 = 58,
+  SRAH_SPECIAL_0_OPCODE_X0 = 89,
+  SRAH_SPECIAL_0_OPCODE_X1 = 59,
+  SRAIB_SHUN_0_OPCODE_X0 = 8,
+  SRAIB_SHUN_0_OPCODE_X1 = 8,
+  SRAIH_SHUN_0_OPCODE_X0 = 9,
+  SRAIH_SHUN_0_OPCODE_X1 = 9,
+  SRAI_SHUN_0_OPCODE_X0 = 10,
+  SRAI_SHUN_0_OPCODE_X1 = 10,
+  SRAI_SHUN_0_OPCODE_Y0 = 4,
+  SRAI_SHUN_0_OPCODE_Y1 = 4,
+  SRA_SPECIAL_0_OPCODE_X0 = 90,
+  SRA_SPECIAL_0_OPCODE_X1 = 60,
+  SRA_SPECIAL_3_OPCODE_Y0 = 3,
+  SRA_SPECIAL_3_OPCODE_Y1 = 3,
+  SUBBS_U_SPECIAL_0_OPCODE_X0 = 100,
+  SUBBS_U_SPECIAL_0_OPCODE_X1 = 70,
+  SUBB_SPECIAL_0_OPCODE_X0 = 91,
+  SUBB_SPECIAL_0_OPCODE_X1 = 61,
+  SUBHS_SPECIAL_0_OPCODE_X0 = 101,
+  SUBHS_SPECIAL_0_OPCODE_X1 = 71,
+  SUBH_SPECIAL_0_OPCODE_X0 = 92,
+  SUBH_SPECIAL_0_OPCODE_X1 = 62,
+  SUBS_SPECIAL_0_OPCODE_X0 = 97,
+  SUBS_SPECIAL_0_OPCODE_X1 = 67,
+  SUB_SPECIAL_0_OPCODE_X0 = 93,
+  SUB_SPECIAL_0_OPCODE_X1 = 63,
+  SUB_SPECIAL_0_OPCODE_Y0 = 3,
+  SUB_SPECIAL_0_OPCODE_Y1 = 3,
+  SWADD_IMM_0_OPCODE_X1 = 30,
+  SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18,
+  SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19,
+  SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20,
+  SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21,
+  SW_OPCODE_Y2 = 7,
+  SW_SPECIAL_0_OPCODE_X1 = 64,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11,
+  TNS_UN_0_SHUN_0_OPCODE_X1 = 22,
+  UN_0_SHUN_0_OPCODE_X0 = 11,
+  UN_0_SHUN_0_OPCODE_X1 = 11,
+  UN_0_SHUN_0_OPCODE_Y0 = 5,
+  UN_0_SHUN_0_OPCODE_Y1 = 5,
+  WH64_UN_0_SHUN_0_OPCODE_X1 = 23,
+  XORI_IMM_0_OPCODE_X0 = 2,
+  XORI_IMM_0_OPCODE_X1 = 21,
+  XOR_SPECIAL_0_OPCODE_X0 = 94,
+  XOR_SPECIAL_0_OPCODE_X1 = 65,
+  XOR_SPECIAL_2_OPCODE_Y0 = 3,
+  XOR_SPECIAL_2_OPCODE_Y1 = 3
+};
+
+#endif /* !_TILE_OPCODE_CONSTANTS_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
new file mode 100644
index 0000000..c8301c4
--- /dev/null
+++ b/arch/tile/include/asm/page.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PAGE_H
+#define _ASM_TILE_PAGE_H
+
+#include <linux/const.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
+#define PAGE_SHIFT	16
+#define HPAGE_SHIFT	24
+
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
+#define HPAGE_SIZE	(_AC(1, UL) << HPAGE_SHIFT)
+
+#define PAGE_MASK	(~(PAGE_SIZE - 1))
+#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
+
+/*
+ * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx
+ * definitions in <hv/hypervisor.h>.  We validate this at build time
+ * here, and again at runtime during early boot.  We provide a
+ * separate definition since userspace doesn't have <hv/hypervisor.h>.
+ *
+ * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since
+ * they are the same on i386 but not TILE.
+ */
+#if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT
+# error Small page size mismatch in Linux
+#endif
+#if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT
+# error Huge page size mismatch in Linux
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+struct page;
+
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+static inline void clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
+
+/*
+ * Hypervisor page tables are made of the same basic structure.
+ */
+
+typedef __u64 pteval_t;
+typedef __u64 pmdval_t;
+typedef __u64 pudval_t;
+typedef __u64 pgdval_t;
+typedef __u64 pgprotval_t;
+
+typedef HV_PTE pte_t;
+typedef HV_PTE pgd_t;
+typedef HV_PTE pgprot_t;
+
+/*
+ * User L2 page tables are managed as one L2 page table per page,
+ * because we use the page allocator for them.  This keeps the allocation
+ * simple and makes it potentially useful to implement HIGHPTE at some point.
+ * However, it's also inefficient, since L2 page tables are much smaller
+ * than pages (currently 2KB vs 64KB).  So we should revisit this.
+ */
+typedef struct page *pgtable_t;
+
+/* Must be a macro since it is used to create constants. */
+#define __pgprot(val) hv_pte(val)
+
+static inline u64 pgprot_val(pgprot_t pgprot)
+{
+	return hv_pte_val(pgprot);
+}
+
+static inline u64 pte_val(pte_t pte)
+{
+	return hv_pte_val(pte);
+}
+
+static inline u64 pgd_val(pgd_t pgd)
+{
+	return hv_pte_val(pgd);
+}
+
+#ifdef __tilegx__
+
+typedef HV_PTE pmd_t;
+
+static inline u64 pmd_val(pmd_t pmd)
+{
+	return hv_pte_val(pmd);
+}
+
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#define HUGE_MAX_HSTATE		2
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+/* Each memory controller has PAs distinct in their high bits. */
+#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
+#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
+#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
+
+#ifdef __tilegx__
+
+/*
+ * We reserve the lower half of memory for user-space programs, and the
+ * upper half for system code.  We re-map all of physical memory in the
+ * upper half, which takes a quarter of our VA space.  Then we have
+ * the vmalloc regions.  The supervisor code lives at 0xfffffff700000000,
+ * with the hypervisor above that.
+ *
+ * Loadable kernel modules are placed immediately after the static
+ * supervisor code, with each being allocated a 256MB region of
+ * address space, so we don't have to worry about the range of "jal"
+ * and other branch instructions.
+ *
+ * For now we keep life simple and just allocate one pmd (4GB) for vmalloc.
+ * Similarly, for now we don't play any struct page mapping games.
+ */
+
+#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+# error Too much PA to map with the VA available!
+#endif
+#define HALF_VA_SPACE           (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
+
+#define MEM_LOW_END		(HALF_VA_SPACE - 1)         /* low half */
+#define MEM_HIGH_START		(-HALF_VA_SPACE)            /* high half */
+#define PAGE_OFFSET		MEM_HIGH_START
+#define _VMALLOC_START		_AC(0xfffffff500000000, UL) /* 4 GB */
+#define HUGE_VMAP_BASE		_AC(0xfffffff600000000, UL) /* 4 GB */
+#define MEM_SV_START		_AC(0xfffffff700000000, UL) /* 256 MB */
+#define MEM_SV_INTRPT		MEM_SV_START
+#define MEM_MODULE_START	_AC(0xfffffff710000000, UL) /* 256 MB */
+#define MEM_MODULE_END		(MEM_MODULE_START + (256*1024*1024))
+#define MEM_HV_START		_AC(0xfffffff800000000, UL) /* 32 GB */
+
+/* Highest DTLB address we will use */
+#define KERNEL_HIGH_VADDR	MEM_SV_START
+
+/* Since we don't currently provide any fixmaps, we use an impossible VA. */
+#define FIXADDR_TOP             MEM_HV_START
+
+#else /* !__tilegx__ */
+
+/*
+ * A PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 768MB.
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM
+ * option in the kernel configuration.
+ *
+ * The top two 16MB chunks in the table below (VIRT and HV) are
+ * unavailable to Linux.  Since the kernel interrupt vectors must live
+ * at 0xfd000000, we map all of the bottom of RAM at this address with
+ * a huge page table entry to minimize its ITLB footprint (as well as
+ * at PAGE_OFFSET).  The last architected requirement is that user
+ * interrupt vectors live at 0xfc000000, so we make that range of
+ * memory available to user processes.  The remaining regions are sized
+ * as shown; after the first four addresses, we show "typical" values,
+ * since the actual addresses depend on kernel #defines.
+ *
+ * MEM_VIRT_INTRPT                 0xff000000
+ * MEM_HV_INTRPT                   0xfe000000
+ * MEM_SV_INTRPT (kernel code)     0xfd000000
+ * MEM_USER_INTRPT (user vector)   0xfc000000
+ * FIX_KMAP_xxx                    0xf8000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE                      0xf7000000 (via LAST_PKMAP)
+ * HUGE_VMAP                       0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
+ * VMALLOC_START                   0xf0000000 (via __VMALLOC_RESERVE)
+ * mapped LOWMEM                   0xc0000000
+ */
+
+#define MEM_USER_INTRPT		_AC(0xfc000000, UL)
+#define MEM_SV_INTRPT		_AC(0xfd000000, UL)
+#define MEM_HV_INTRPT		_AC(0xfe000000, UL)
+#define MEM_VIRT_INTRPT		_AC(0xff000000, UL)
+
+#define INTRPT_SIZE		0x4000
+
+/* Tolerate page size larger than the architecture interrupt region size. */
+#if PAGE_SIZE > INTRPT_SIZE
+#undef INTRPT_SIZE
+#define INTRPT_SIZE PAGE_SIZE
+#endif
+
+#define KERNEL_HIGH_VADDR	MEM_USER_INTRPT
+#define FIXADDR_TOP		(KERNEL_HIGH_VADDR - PAGE_SIZE)
+
+#define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
+
+/* On 32-bit architectures we mix kernel modules in with other vmaps. */
+#define MEM_MODULE_START	VMALLOC_START
+#define MEM_MODULE_END		VMALLOC_END
+
+#endif /* __tilegx__ */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_HIGHMEM
+
+/* Map kernel virtual addresses to page frames, in HPAGE_SIZE chunks. */
+extern unsigned long pbase_map[];
+extern void *vbase_map[];
+
+static inline unsigned long kaddr_to_pfn(const volatile void *_kaddr)
+{
+	unsigned long kaddr = (unsigned long)_kaddr;
+	return pbase_map[kaddr >> HPAGE_SHIFT] +
+		((kaddr & (HPAGE_SIZE - 1)) >> PAGE_SHIFT);
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+	return vbase_map[__pfn_to_highbits(pfn)] + (pfn << PAGE_SHIFT);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+	unsigned long pfn = kaddr_to_pfn(kaddr);
+	return ((phys_addr_t)pfn << PAGE_SHIFT) +
+		((unsigned long)kaddr & (PAGE_SIZE-1));
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+	return pfn_to_kaddr(paddr >> PAGE_SHIFT) + (paddr & (PAGE_SIZE-1));
+}
+
+/* With HIGHMEM, we pack PAGE_OFFSET through high_memory with all valid VAs. */
+static inline int virt_addr_valid(const volatile void *kaddr)
+{
+	extern void *high_memory;  /* copied from <linux/mm.h> */
+	return ((unsigned long)kaddr >= PAGE_OFFSET && kaddr < high_memory);
+}
+
+#else /* !CONFIG_HIGHMEM */
+
+static inline unsigned long kaddr_to_pfn(const volatile void *kaddr)
+{
+	return ((unsigned long)kaddr - PAGE_OFFSET) >> PAGE_SHIFT;
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+	return (void *)((pfn << PAGE_SHIFT) + PAGE_OFFSET);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+	return (phys_addr_t)((unsigned long)kaddr - PAGE_OFFSET);
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+	return (void *)((unsigned long)paddr + PAGE_OFFSET);
+}
+
+/* Check that the given address is within some mapped range of PAs. */
+#define virt_addr_valid(kaddr) pfn_valid(kaddr_to_pfn(kaddr))
+
+#endif /* !CONFIG_HIGHMEM */
+
+/* All callers are not consistent in how they call these functions. */
+#define __pa(kaddr) virt_to_phys((void *)(unsigned long)(kaddr))
+#define __va(paddr) phys_to_virt((phys_addr_t)(paddr))
+
+extern int devmem_is_allowed(unsigned long pagenr);
+
+#ifdef CONFIG_FLATMEM
+static inline int pfn_valid(unsigned long pfn)
+{
+	return pfn < max_mapnr;
+}
+#endif
+
+/* Provide as macros since these require some other headers included. */
+#define page_to_pa(page) ((phys_addr_t)(page_to_pfn(page)) << PAGE_SHIFT)
+#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn(kaddr))
+#define page_to_virt(page) pfn_to_kaddr(page_to_pfn(page))
+
+struct mm_struct;
+extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _ASM_TILE_PAGE_H */
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
new file mode 100644
index 0000000..965d454
--- /dev/null
+++ b/arch/tile/include/asm/param.h
@@ -0,0 +1 @@
+#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
new file mode 100644
index 0000000..e853b0e
--- /dev/null
+++ b/arch/tile/include/asm/pci-bridge.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PCI_BRIDGE_H
+#define _ASM_TILE_PCI_BRIDGE_H
+
+#include <linux/ioport.h>
+#include <linux/pci.h>
+
+struct device_node;
+struct pci_controller;
+
+/*
+ * pci_io_base returns the memory address at which you can access
+ * the I/O space for PCI bus number `bus' (or NULL on error).
+ */
+extern void __iomem *pci_bus_io_base(unsigned int bus);
+extern unsigned long pci_bus_io_base_phys(unsigned int bus);
+extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
+
+/* Allocate a new PCI host bridge structure */
+extern struct pci_controller *pcibios_alloc_controller(void);
+
+/* Helper function for setting up resources */
+extern void pci_init_resource(struct resource *res, unsigned long start,
+			      unsigned long end, int flags, char *name);
+
+/* Get the PCI host controller for a bus */
+extern struct pci_controller *pci_bus_to_hose(int bus);
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	int index;		/* PCI domain number */
+	struct pci_bus *root_bus;
+
+	int first_busno;
+	int last_busno;
+
+	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
+	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
+
+	struct pci_ops *ops;
+
+	int irq_base;		/* Base IRQ from the Hypervisor	*/
+	int plx_gen1;		/* flag for PLX Gen 1 configuration */
+
+	/* Address ranges that are routed to this controller/bridge. */
+	struct resource mem_resources[3];
+};
+
+static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
+{
+	return bus->sysdata;
+}
+
+extern void setup_indirect_pci_nomap(struct pci_controller *hose,
+			       void __iomem *cfg_addr, void __iomem *cfg_data);
+extern void setup_indirect_pci(struct pci_controller *hose,
+			       u32 cfg_addr, u32 cfg_data);
+extern void setup_grackle(struct pci_controller *hose);
+
+extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
+
+/*
+ *   The following code swizzles for exactly one bridge.  The routine
+ *   common_swizzle below handles multiple bridges.  But there are a
+ *   some boards that don't follow the PCI spec's suggestion so we
+ *   break this piece out separately.
+ */
+static inline unsigned char bridge_swizzle(unsigned char pin,
+		unsigned char idsel)
+{
+	return (((pin-1) + idsel) % 4) + 1;
+}
+
+/*
+ * The following macro is used to lookup irqs in a standard table
+ * format for those PPC systems that do not already have PCI
+ * interrupts properly routed.
+ */
+/* FIXME - double check this */
+#define PCI_IRQ_TABLE_LOOKUP ({ \
+	long _ctl_ = -1; \
+	if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
+		_ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
+	_ctl_; \
+})
+
+/*
+ * Scan the buses below a given PCI host bridge and assign suitable
+ * resources to all devices found.
+ */
+extern int pciauto_bus_scan(struct pci_controller *, int);
+
+#ifdef CONFIG_PCI
+extern unsigned long pci_address_to_pio(phys_addr_t address);
+#else
+static inline unsigned long pci_address_to_pio(phys_addr_t address)
+{
+	return (unsigned long)-1;
+}
+#endif
+
+#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
new file mode 100644
index 0000000..b0c15da
--- /dev/null
+++ b/arch/tile/include/asm/pci.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PCI_H
+#define _ASM_TILE_PCI_H
+
+#include <asm/pci-bridge.h>
+
+/*
+ * The hypervisor maps the entirety of CPA-space as bus addresses, so
+ * bus addresses are physical addresses.  The networking and block
+ * device layers use this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS     1
+
+struct pci_controller *pci_bus_to_hose(int bus);
+unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
+int __init tile_pci_init(void);
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+void __devinit pcibios_fixup_bus(struct pci_bus *bus);
+
+int __devinit _tile_cfg_read(struct pci_controller *hose,
+				    int bus,
+				    int slot,
+				    int function,
+				    int offset,
+				    int size,
+				    u32 *val);
+int __devinit _tile_cfg_write(struct pci_controller *hose,
+				     int bus,
+				     int slot,
+				     int function,
+				     int offset,
+				     int size,
+				     u32 val);
+
+/*
+ * These are used to to config reads and writes in the early stages of
+ * setup before the driver infrastructure has been set up enough to be
+ * able to do config reads and writes.
+ */
+#define early_cfg_read(where, size, value) \
+	_tile_cfg_read(controller, \
+		       current_bus, \
+		       pci_slot, \
+		       pci_fn, \
+		       where, \
+		       size, \
+		       value)
+
+#define early_cfg_write(where, size, value) \
+	_tile_cfg_write(controller, \
+		       current_bus, \
+		       pci_slot, \
+		       pci_fn, \
+		       where, \
+		       size, \
+		       value)
+
+
+
+#define PCICFG_BYTE	1
+#define PCICFG_WORD	2
+#define PCICFG_DWORD	4
+
+#define	TILE_NUM_PCIE	2
+
+#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
+
+/*
+ * This decides whether to display the domain number in /proc.
+ */
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return 1;
+}
+
+/*
+ * I/O space is currently not supported.
+ */
+
+#define TILE_PCIE_LOWER_IO		0x0
+#define TILE_PCIE_UPPER_IO		0x10000
+#define TILE_PCIE_PCIE_IO_SIZE		0x0000FFFF
+
+#define _PAGE_NO_CACHE		0
+#define _PAGE_GUARDED		0
+
+
+#define pcibios_assign_all_busses()    pci_assign_all_buses
+extern int pci_assign_all_buses;
+
+static inline void pcibios_set_master(struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
+}
+
+#define PCIBIOS_MIN_MEM		0
+#define PCIBIOS_MIN_IO		TILE_PCIE_LOWER_IO
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int blade_pci;
+
+/* implement the pci_ DMA API in terms of the generic device dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
+
+#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
new file mode 100644
index 0000000..63294f5
--- /dev/null
+++ b/arch/tile/include/asm/percpu.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PERCPU_H
+#define _ASM_TILE_PERCPU_H
+
+register unsigned long __my_cpu_offset __asm__("tp");
+#define __my_cpu_offset __my_cpu_offset
+#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_TILE_PERCPU_H */
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
new file mode 100644
index 0000000..cf52791
--- /dev/null
+++ b/arch/tile/include/asm/pgalloc.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PGALLOC_H
+#define _ASM_TILE_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <asm/fixmap.h>
+#include <hv/hypervisor.h>
+
+/* Bits for the size of the second-level page table. */
+#define L2_KERNEL_PGTABLE_SHIFT \
+  (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+
+/* We currently allocate user L2 page tables by page (unlike kernel L2s). */
+#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
+#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#else
+#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
+#endif
+
+/* How many pages do we need, as an "order", for a user L2 page table? */
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
+
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER);
+#else
+	set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER);
+#endif
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm,
+				       pmd_t *pmd, pte_t *ptep)
+{
+	set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+			      __pgprot(_PAGE_PRESENT)));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t page)
+{
+	set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+			      __pgprot(_PAGE_PRESENT)));
+}
+
+/*
+ * Allocate and free page tables.
+ */
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+extern void pte_free(struct mm_struct *mm, struct page *pte);
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	return pfn_to_kaddr(page_to_pfn(pte_alloc_one(mm, address)));
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+	pte_free(mm, virt_to_page(pte));
+}
+
+extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+			   unsigned long address);
+
+#define check_pgt_cache()	do { } while (0)
+
+/*
+ * Get the small-page pte_t lowmem entry for a given pfn.
+ * This may or may not be in use, depending on whether the initial
+ * huge-page entry for the page has already been shattered.
+ */
+pte_t *get_prealloc_pte(unsigned long pfn);
+
+/* During init, we can shatter kernel huge pages if needed. */
+void shatter_pmd(pmd_t *pmd);
+
+#ifdef __tilegx__
+/* We share a single page allocator for both L1 and L2 page tables. */
+#if HV_L1_SIZE != HV_L2_SIZE
+# error Rework assumption that L1 and L2 page tables are same size.
+#endif
+#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
+#define pud_populate(mm, pud, pmd) \
+  pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
+#define pmd_alloc_one(mm, addr) \
+  ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
+#define pmd_free(mm, pmdp) \
+  pte_free((mm), virt_to_page(pmdp))
+#define __pmd_free_tlb(tlb, pmdp, address) \
+  __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+#endif
+
+#endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
new file mode 100644
index 0000000..beb1504
--- /dev/null
+++ b/arch/tile/include/asm/pgtable.h
@@ -0,0 +1,475 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the TILE page table tree.
+ */
+
+#ifndef _ASM_TILE_PGTABLE_H
+#define _ASM_TILE_PGTABLE_H
+
+#include <hv/hypervisor.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/system.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern pgd_t swapper_pg_dir[];
+extern pgprot_t swapper_pgprot;
+extern struct kmem_cache *pgd_cache;
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+/*
+ * The very last slots in the pgd_t are for addresses unusable by Linux
+ * (pgd_addr_invalid() returns true).  So we use them for the list structure.
+ * The x86 code we are modelled on uses the page->private/index fields
+ * (older 2.6 kernels) or the lru list (newer 2.6 kernels), but since
+ * our pgds are so much smaller than a page, it seems a waste to
+ * spend a whole page on each pgd.
+ */
+#define PGD_LIST_OFFSET \
+  ((PTRS_PER_PGD * sizeof(pgd_t)) - sizeof(struct list_head))
+#define pgd_to_list(pgd) \
+  ((struct list_head *)((char *)(pgd) + PGD_LIST_OFFSET))
+#define list_to_pgd(list) \
+  ((pgd_t *)((char *)(list) - PGD_LIST_OFFSET))
+
+extern void pgtable_cache_init(void);
+extern void paging_init(void);
+extern void set_page_homes(void);
+
+#define FIRST_USER_ADDRESS	0
+
+#define _PAGE_PRESENT           HV_PTE_PRESENT
+#define _PAGE_HUGE_PAGE         HV_PTE_PAGE
+#define _PAGE_READABLE          HV_PTE_READABLE
+#define _PAGE_WRITABLE          HV_PTE_WRITABLE
+#define _PAGE_EXECUTABLE        HV_PTE_EXECUTABLE
+#define _PAGE_ACCESSED          HV_PTE_ACCESSED
+#define _PAGE_DIRTY             HV_PTE_DIRTY
+#define _PAGE_GLOBAL            HV_PTE_GLOBAL
+#define _PAGE_USER              HV_PTE_USER
+
+/*
+ * All the "standard" bits.  Cache-control bits are managed elsewhere.
+ * This is used to test for valid level-2 page table pointers by checking
+ * all the bits, and to mask away the cache control bits for mprotect.
+ */
+#define _PAGE_ALL (\
+  _PAGE_PRESENT | \
+  _PAGE_HUGE_PAGE | \
+  _PAGE_READABLE | \
+  _PAGE_WRITABLE | \
+  _PAGE_EXECUTABLE | \
+  _PAGE_ACCESSED | \
+  _PAGE_DIRTY | \
+  _PAGE_GLOBAL | \
+  _PAGE_USER \
+)
+
+#define PAGE_NONE \
+	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define PAGE_SHARED \
+	__pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+		 _PAGE_USER | _PAGE_ACCESSED)
+
+#define PAGE_SHARED_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+		 _PAGE_EXECUTABLE | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_COPY_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+		 _PAGE_READABLE | _PAGE_EXECUTABLE)
+#define PAGE_COPY \
+	PAGE_COPY_NOEXEC
+#define PAGE_READONLY \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_READONLY_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+		 _PAGE_READABLE | _PAGE_EXECUTABLE)
+
+#define _PAGE_KERNEL_RO \
+ (_PAGE_PRESENT | _PAGE_GLOBAL | _PAGE_READABLE | _PAGE_ACCESSED)
+#define _PAGE_KERNEL \
+ (_PAGE_KERNEL_RO | _PAGE_WRITABLE | _PAGE_DIRTY)
+#define _PAGE_KERNEL_EXEC       (_PAGE_KERNEL_RO | _PAGE_EXECUTABLE)
+
+#define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL_EXEC)
+
+#define page_to_kpgprot(p) PAGE_KERNEL
+
+/*
+ * We could tighten these up, but for now writable or executable
+ * implies readable.
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY      /* this is write-only, which we won't support */
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_EXEC
+#define __P101	PAGE_READONLY_EXEC
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_EXEC
+#define __S101	PAGE_READONLY_EXEC
+#define __S110	PAGE_SHARED_EXEC
+#define __S111	PAGE_SHARED_EXEC
+
+/*
+ * All the normal _PAGE_ALL bits are ignored for PMDs, except PAGE_PRESENT
+ * and PAGE_HUGE_PAGE, which must be one and zero, respectively.
+ * We set the ignored bits to zero.
+ */
+#define _PAGE_TABLE     _PAGE_PRESENT
+
+/* Inherit the caching flags from the old protection bits. */
+#define pgprot_modify(oldprot, newprot) \
+  (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
+
+/* Just setting the PFN to zero suffices. */
+#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+
+/*
+ * For PTEs and PDEs, we must clear the Present bit first when
+ * clearing a page table entry, so clear the bottom half first and
+ * enforce ordering with a barrier.
+ */
+static inline void __pte_clear(pte_t *ptep)
+{
+#ifdef __tilegx__
+	ptep->val = 0;
+#else
+	u32 *tmp = (u32 *)ptep;
+	tmp[0] = 0;
+	barrier();
+	tmp[1] = 0;
+#endif
+}
+#define pte_clear(mm, addr, ptep) __pte_clear(ptep)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+#define pte_present hv_pte_get_present
+#define pte_user hv_pte_get_user
+#define pte_read hv_pte_get_readable
+#define pte_dirty hv_pte_get_dirty
+#define pte_young hv_pte_get_accessed
+#define pte_write hv_pte_get_writable
+#define pte_exec hv_pte_get_executable
+#define pte_huge hv_pte_get_page
+#define pte_rdprotect hv_pte_clear_readable
+#define pte_exprotect hv_pte_clear_executable
+#define pte_mkclean hv_pte_clear_dirty
+#define pte_mkold hv_pte_clear_accessed
+#define pte_wrprotect hv_pte_clear_writable
+#define pte_mksmall hv_pte_clear_page
+#define pte_mkread hv_pte_set_readable
+#define pte_mkexec hv_pte_set_executable
+#define pte_mkdirty hv_pte_set_dirty
+#define pte_mkyoung hv_pte_set_accessed
+#define pte_mkwrite hv_pte_set_writable
+#define pte_mkhuge hv_pte_set_page
+
+#define pte_special(pte) 0
+#define pte_mkspecial(pte) (pte)
+
+/*
+ * Use some spare bits in the PTE for user-caching tags.
+ */
+#define pte_set_forcecache hv_pte_set_client0
+#define pte_get_forcecache hv_pte_get_client0
+#define pte_clear_forcecache hv_pte_clear_client0
+#define pte_set_anyhome hv_pte_set_client1
+#define pte_get_anyhome hv_pte_get_client1
+#define pte_clear_anyhome hv_pte_clear_client1
+
+/*
+ * A migrating PTE has PAGE_PRESENT clear but all the other bits preserved.
+ */
+#define pte_migrating hv_pte_get_migrating
+#define pte_mkmigrate(x) hv_pte_set_migrating(hv_pte_clear_present(x))
+#define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x))
+
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * set_pte_order() sets the given PTE and also sanity-checks the
+ * requested PTE against the page homecaching.  Unspecified parts
+ * of the PTE are filled in when it is written to memory, i.e. all
+ * caching attributes if "!forcecache", or the home cpu if "anyhome".
+ */
+extern void set_pte_order(pte_t *ptep, pte_t pte, int order);
+
+#define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0)
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
+
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+
+static inline int pte_none(pte_t pte)
+{
+	return !pte.val;
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return hv_pte_get_pfn(pte);
+}
+
+/* Set or get the remote cache cpu in a pgprot with remote caching. */
+extern pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu);
+extern int get_remote_cache_cpu(pgprot_t prot);
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+	return hv_pte_set_pfn(prot, pfn);
+}
+
+/* Support for priority mappings. */
+extern void start_mm_caching(struct mm_struct *mm);
+extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
+
+/*
+ * Support non-linear file mappings (see sys_remap_file_pages).
+ * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the
+ * file offset in the 32 high bits.
+ */
+#define _PAGE_FILE        HV_PTE_CLIENT1
+#define PTE_FILE_MAX_BITS 32
+#define pte_file(pte)     (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte))
+#define pte_to_pgoff(pte) ((pte).val >> 32)
+#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE })
+
+/*
+ * Encode and de-code a swap entry (see <linux/swapops.h>).
+ * We put the swap file type+offset in the 32 high bits;
+ * I believe we can just leave the low bits clear.
+ */
+#define __swp_type(swp)		((swp).val & 0x1f)
+#define __swp_offset(swp)	((swp).val >> 5)
+#define __swp_entry(type, off)	((swp_entry_t) { (type) | ((off) << 5) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { (pte).val >> 32 })
+#define __swp_entry_to_pte(swp)	((pte_t) { (((long long) ((swp).val)) << 32) })
+
+/*
+ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+ *
+ *  dst - pointer to pgd range anwhere on a pgd page
+ *  src - ""
+ *  count - the number of pgds to copy.
+ *
+ * dst and src can be on the same page, but the range must not overlap,
+ * and must not cross a page boundary.
+ */
+static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+{
+       memcpy(dst, src, count * sizeof(pgd_t));
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+/*
+ * If we are doing an mprotect(), just accept the new vma->vm_page_prot
+ * value and combine it with the PFN from the old PTE to get a new PTE.
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return pfn_pte(hv_pte_get_pfn(pte), newprot);
+}
+
+/*
+ * The pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * This macro returns the index of the entry in the pgd page which would
+ * control the given virtual address.
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's.
+ */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+/*
+ * A shortcut which implies the use of the kernel's pgd, instead
+ * of a process's.
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#if defined(CONFIG_HIGHPTE)
+extern pte_t *_pte_offset_map(pmd_t *, unsigned long address, enum km_type);
+#define pte_offset_map(dir, address) \
+	_pte_offset_map(dir, address, KM_PTE0)
+#define pte_offset_map_nested(dir, address) \
+	_pte_offset_map(dir, address, KM_PTE1)
+#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+#else
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+#endif
+
+/* Clear a non-executable kernel PTE and flush it from the TLB. */
+#define kpte_clear_flush(ptep, vaddr)		\
+do {						\
+	pte_clear(&init_mm, (vaddr), (ptep));	\
+	local_flush_tlb_page(FLUSH_NONEXEC, (vaddr), PAGE_SIZE); \
+} while (0)
+
+/*
+ * The kernel page tables contain what we need, and we flush when we
+ * change specific page table entries.
+ */
+#define update_mmu_cache(vma, address, pte) do { } while (0)
+
+#ifdef CONFIG_FLATMEM
+#define kern_addr_valid(addr)	(1)
+#endif /* CONFIG_FLATMEM */
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+		remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+extern void vmalloc_sync_all(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef __tilegx__
+#include <asm/pgtable_64.h>
+#else
+#include <asm/pgtable_32.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+static inline int pmd_none(pmd_t pmd)
+{
+	/*
+	 * Only check low word on 32-bit platforms, since it might be
+	 * out of sync with upper half.
+	 */
+	return (unsigned long)pmd_val(pmd) == 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_PRESENT;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & _PAGE_ALL) != _PAGE_TABLE);
+}
+
+static inline unsigned long pages_to_mb(unsigned long npg)
+{
+	return npg >> (20 - PAGE_SHIFT);
+}
+
+/*
+ * The pmd can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * This function returns the index of the entry in the pmd which would
+ * control the given virtual address.
+ */
+static inline unsigned long pmd_index(unsigned long address)
+{
+	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+/*
+ * A given kernel pmd_t maps to a specific virtual address (either a
+ * kernel huge page or a kernel pte_t table).  Since kernel pte_t
+ * tables can be aligned at sub-page granularity, this function can
+ * return non-page-aligned pointers, despite its name.
+ */
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+	phys_addr_t pa =
+		(phys_addr_t)pmd_ptfn(pmd) << HV_LOG2_PAGE_TABLE_ALIGN;
+	return (unsigned long)__va(pa);
+}
+
+/*
+ * A pmd_t points to the base of a huge page or to a pte_t array.
+ * If a pte_t array, since we can have multiple per page, we don't
+ * have a one-to-one mapping of pmd_t's to pages.  However, this is
+ * OK for pte_lockptr(), since we just end up with potentially one
+ * lock being used for several pte_t arrays.
+ */
+#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+
+/*
+ * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * This macro returns the index of the entry in the pte page which would
+ * control the given virtual address.
+ */
+static inline unsigned long pte_index(unsigned long address)
+{
+	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+       return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
+
+static inline int pmd_huge_page(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_HUGE_PAGE;
+}
+
+#include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_H */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
new file mode 100644
index 0000000..b935fb2
--- /dev/null
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_PGTABLE_32_H
+#define _ASM_TILE_PGTABLE_32_H
+
+/*
+ * The level-1 index is defined by the huge page size.  A PGD is composed
+ * of PTRS_PER_PGD pgd_t's and is the top level of the page table.
+ */
+#define PGDIR_SHIFT	HV_LOG2_PAGE_SIZE_LARGE
+#define PGDIR_SIZE	HV_PAGE_SIZE_LARGE
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+
+/*
+ * The level-2 index is defined by the difference between the huge
+ * page size and the normal page size.  A PTE is composed of
+ * PTRS_PER_PTE pte_t's and is the bottom level of the page table.
+ * Note that the hypervisor docs use PTE for what we call pte_t, so
+ * this nomenclature is somewhat confusing.
+ */
+#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ *
+ * HOWEVER, if we are using an allocation scheme with slop after the
+ * end of the page table (e.g. where our L2 page tables are 2KB but
+ * our pages are 64KB and we are allocating via the page allocator)
+ * we can't extend it easily.
+ */
+#define LAST_PKMAP PTRS_PER_PTE
+
+#define PKMAP_BASE   ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define __VMAPPING_END	(PKMAP_BASE & ~(HPAGE_SIZE-1))
+#else
+# define __VMAPPING_END	(FIXADDR_START & ~(HPAGE_SIZE-1))
+#endif
+
+#ifdef CONFIG_HUGEVMAP
+#define HUGE_VMAP_END	__VMAPPING_END
+#define HUGE_VMAP_BASE	(HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
+#define _VMALLOC_END	HUGE_VMAP_BASE
+#else
+#define _VMALLOC_END	__VMAPPING_END
+#endif
+
+/*
+ * Align the vmalloc area to an L2 page table, and leave a guard page
+ * at the beginning and end.  The vmalloc code also puts in an internal
+ * guard page between each allocation.
+ */
+#define VMALLOC_END	(_VMALLOC_END - PAGE_SIZE)
+extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
+#define _VMALLOC_START	(_VMALLOC_END - VMALLOC_RESERVE)
+#define VMALLOC_START	(_VMALLOC_START + PAGE_SIZE)
+
+/* This is the maximum possible amount of lowmem. */
+#define MAXMEM		(_VMALLOC_START - PAGE_OFFSET)
+
+/* We have no pmd or pud since we are strictly a two-level page table */
+#include <asm-generic/pgtable-nopmd.h>
+
+/* We don't define any pgds for these addresses. */
+static inline int pgd_addr_invalid(unsigned long addr)
+{
+	return addr >= MEM_HV_INTRPT;
+}
+
+/*
+ * Provide versions of these routines that can be used safely when
+ * the hypervisor may be asynchronously modifying dirty/accessed bits.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+
+extern int ptep_test_and_clear_young(struct vm_area_struct *,
+				     unsigned long addr, pte_t *);
+extern void ptep_set_wrprotect(struct mm_struct *,
+			       unsigned long addr, pte_t *);
+
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+	return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	__pte_clear(&pmdp->pud.pgd);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_32_H */
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
new file mode 100644
index 0000000..c98509d
--- /dev/null
+++ b/arch/tile/include/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
new file mode 100644
index 0000000..22cae62
--- /dev/null
+++ b/arch/tile/include/asm/posix_types.h
@@ -0,0 +1 @@
+#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
new file mode 100644
index 0000000..96c50d2
--- /dev/null
+++ b/arch/tile/include/asm/processor.h
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PROCESSOR_H
+#define _ASM_TILE_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * NOTE: we don't include <linux/ptrace.h> or <linux/percpu.h> as one
+ * normally would, due to #include dependencies.
+ */
+#include <asm/ptrace.h>
+#include <asm/percpu.h>
+
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+
+struct task_struct;
+struct thread_struct;
+struct list_head;
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+void *current_text_addr(void);
+
+#if CHIP_HAS_TILE_DMA()
+/* Capture the state of a suspended DMA. */
+struct tile_dma_state {
+	int enabled;
+	unsigned long src;
+	unsigned long dest;
+	unsigned long strides;
+	unsigned long chunk_size;
+	unsigned long src_chunk;
+	unsigned long dest_chunk;
+	unsigned long byte;
+	unsigned long status;
+};
+
+/*
+ * A mask of the DMA status register for selecting only the 'running'
+ * and 'done' bits.
+ */
+#define DMA_STATUS_MASK \
+  (SPR_DMA_STATUS__RUNNING_MASK | SPR_DMA_STATUS__DONE_MASK)
+#endif
+
+/*
+ * Track asynchronous TLB events (faults and access violations)
+ * that occur while we are in kernel mode from DMA or the SN processor.
+ */
+struct async_tlb {
+	short fault_num;         /* original fault number; 0 if none */
+	char is_fault;           /* was it a fault (vs an access violation) */
+	char is_write;           /* for fault: was it caused by a write? */
+	unsigned long address;   /* what address faulted? */
+};
+
+
+struct thread_struct {
+	/* kernel stack pointer */
+	unsigned long  ksp;
+	/* kernel PC */
+	unsigned long  pc;
+	/* starting user stack pointer (for page migration) */
+	unsigned long  usp0;
+	/* pid of process that created this one */
+	pid_t creator_pid;
+#if CHIP_HAS_TILE_DMA()
+	/* DMA info for suspended threads (byte == 0 means no DMA state) */
+	struct tile_dma_state tile_dma_state;
+#endif
+	/* User EX_CONTEXT registers */
+	unsigned long ex_context[2];
+	/* User SYSTEM_SAVE registers */
+	unsigned long system_save[4];
+	/* User interrupt mask */
+	unsigned long long interrupt_mask;
+	/* User interrupt-control 0 state */
+	unsigned long intctrl_0;
+#if CHIP_HAS_PROC_STATUS_SPR()
+	/* Any other miscellaneous processor state bits */
+	unsigned long proc_status;
+#endif
+#if CHIP_HAS_TILE_DMA()
+	/* Async DMA TLB fault information */
+	struct async_tlb dma_async_tlb;
+#endif
+#if CHIP_HAS_SN_PROC()
+	/* Was static network processor when we were switched out? */
+	int sn_proc_running;
+	/* Async SNI TLB fault information */
+	struct async_tlb sn_async_tlb;
+#endif
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Start with "sp" this many bytes below the top of the kernel stack.
+ * This preserves the invariant that a called function may write to *sp.
+ */
+#define STACK_TOP_DELTA 8
+
+/*
+ * When entering the kernel via a fault, start with the top of the
+ * pt_regs structure this many bytes below the top of the page.
+ * This aligns the pt_regs structure optimally for cache-line access.
+ */
+#ifdef __tilegx__
+#define KSTK_PTREGS_GAP  48
+#else
+#define KSTK_PTREGS_GAP  56
+#endif
+
+#ifndef __ASSEMBLY__
+
+#ifdef __tilegx__
+#define TASK_SIZE_MAX		(MEM_LOW_END + 1)
+#else
+#define TASK_SIZE_MAX		PAGE_OFFSET
+#endif
+
+/* TASK_SIZE and related variables are always checked in "current" context. */
+#ifdef CONFIG_COMPAT
+#define COMPAT_TASK_SIZE	(1UL << 31)
+#define TASK_SIZE		((current_thread_info()->status & TS_COMPAT) ?\
+				 COMPAT_TASK_SIZE : TASK_SIZE_MAX)
+#else
+#define TASK_SIZE		TASK_SIZE_MAX
+#endif
+
+/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
+#define VDSO_BASE		(TASK_SIZE - PAGE_SIZE)
+
+#define STACK_TOP		VDSO_BASE
+
+/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
+#define STACK_TOP_MAX		TASK_SIZE_MAX
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's, if it is using bottom-up mapping.
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+#define INIT_THREAD {                                                   \
+	.ksp = (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA, \
+	.interrupt_mask = -1ULL                                         \
+}
+
+/* Kernel stack top for the task that first boots on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_sp);
+
+/* PC to boot from on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_pc);
+
+/* Do necessary setup to start up a newly executed thread. */
+static inline void start_thread(struct pt_regs *regs,
+				unsigned long pc, unsigned long usp)
+{
+	regs->pc = pc;
+	regs->sp = usp;
+}
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+	/* Nothing for now */
+}
+
+/* Prepare to copy thread state - unlazy all lazy status. */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Helper routines for setting home cache modes at exec() time. */
+
+
+/*
+ * Return saved (kernel) PC of a blocked thread.
+ * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ */
+#define thread_saved_pc(t)   ((t)->thread.pc)
+
+unsigned long get_wchan(struct task_struct *p);
+
+/* Return initial ksp value for given task. */
+#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+
+/* Return some info about the user process TASK. */
+#define KSTK_TOP(task)	(task_ksp0(task) - STACK_TOP_DELTA)
+#define task_pt_regs(task) \
+  ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define task_sp(task)	(task_pt_regs(task)->sp)
+#define task_pc(task)	(task_pt_regs(task)->pc)
+/* Aliases for pc and sp (used in fs/proc/array.c) */
+#define KSTK_EIP(task)	task_pc(task)
+#define KSTK_ESP(task)	task_sp(task)
+
+/* Standard format for printing registers and other word-size data. */
+#ifdef __tilegx__
+# define REGFMT "0x%016lx"
+#else
+# define REGFMT "0x%08lx"
+#endif
+
+/*
+ * Do some slow action (e.g. read a slow SPR).
+ * Note that this must also have compiler-barrier semantics since
+ * it may be used in a busy loop reading memory.
+ */
+static inline void cpu_relax(void)
+{
+	__insn_mfspr(SPR_PASS);
+	barrier();
+}
+
+struct siginfo;
+extern void arch_coredump_signal(struct siginfo *, struct pt_regs *);
+#define arch_coredump_signal arch_coredump_signal
+
+/* Provide information about the chip model. */
+extern char chip_model[64];
+
+/* Data on which physical memory controller corresponds to which NUMA node. */
+extern int node_controller[];
+
+
+/* Do we dump information to the console when a user application crashes? */
+extern int show_crashinfo;
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Does the heap allocator return hash-for-home pages by default? */
+extern int hash_default;
+
+/* Should kernel stack pages be hash-for-home? */
+extern int kstack_hash;
+#else
+#define hash_default 0
+#define kstack_hash 0
+#endif
+
+/* Are we using huge pages in the TLB for kernel data? */
+extern int kdata_huge;
+
+/*
+ * Note that with OLOC the prefetch will return an unused read word to
+ * the issuing tile, which will cause some MDN traffic.  Benchmarking
+ * should be done to see whether this outweighs prefetching.
+ */
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#define prefetch(ptr) __builtin_prefetch((ptr), 0, 3)
+#define prefetchw(ptr) __builtin_prefetch((ptr), 1, 3)
+
+#ifdef CONFIG_SMP
+#define spin_lock_prefetch(ptr) prefetchw(ptr)
+#else
+/* Nothing to prefetch. */
+#define spin_lock_prefetch(lock)	do { } while (0)
+#endif
+
+#else /* __ASSEMBLY__ */
+
+/* Do some slow action (e.g. read a slow SPR). */
+#define CPU_RELAX       mfspr zero, SPR_PASS
+
+#endif /* !__ASSEMBLY__ */
+
+/* Assembly code assumes that the PL is in the low bits. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != 0
+# error Fix assembly assumptions about PL
+#endif
+
+/* We sometimes use these macros for EX_CONTEXT_0_1 as well. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != SPR_EX_CONTEXT_0_1__PL_SHIFT || \
+    SPR_EX_CONTEXT_1_1__PL_RMASK != SPR_EX_CONTEXT_0_1__PL_RMASK || \
+    SPR_EX_CONTEXT_1_1__ICS_SHIFT != SPR_EX_CONTEXT_0_1__ICS_SHIFT || \
+    SPR_EX_CONTEXT_1_1__ICS_RMASK != SPR_EX_CONTEXT_0_1__ICS_RMASK
+# error Fix assumptions that EX1 macros work for both PL0 and PL1
+#endif
+
+/* Allow pulling apart and recombining the PL and ICS bits in EX_CONTEXT. */
+#define EX1_PL(ex1) \
+  (((ex1) >> SPR_EX_CONTEXT_1_1__PL_SHIFT) & SPR_EX_CONTEXT_1_1__PL_RMASK)
+#define EX1_ICS(ex1) \
+  (((ex1) >> SPR_EX_CONTEXT_1_1__ICS_SHIFT) & SPR_EX_CONTEXT_1_1__ICS_RMASK)
+#define PL_ICS_EX1(pl, ics) \
+  (((pl) << SPR_EX_CONTEXT_1_1__PL_SHIFT) | \
+   ((ics) << SPR_EX_CONTEXT_1_1__ICS_SHIFT))
+
+/*
+ * Provide symbolic constants for PLs.
+ * Note that assembly code assumes that USER_PL is zero.
+ */
+#define USER_PL 0
+#define KERNEL_PL 1
+
+/* SYSTEM_SAVE_1_0 holds the current cpu number ORed with ksp0. */
+#define CPU_LOG_MASK_VALUE 12
+#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
+#if CONFIG_NR_CPUS > CPU_MASK_VALUE
+# error Too many cpus!
+#endif
+#define raw_smp_processor_id() \
+	((int)__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & CPU_MASK_VALUE)
+#define get_current_ksp0() \
+	(__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & ~CPU_MASK_VALUE)
+#define next_current_ksp0(task) ({ \
+	unsigned long __ksp0 = task_ksp0(task); \
+	int __cpu = raw_smp_processor_id(); \
+	BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+	__ksp0 | __cpu; \
+})
+
+#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
new file mode 100644
index 0000000..4d1d995
--- /dev/null
+++ b/arch/tile/include/asm/ptrace.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PTRACE_H
+#define _ASM_TILE_PTRACE_H
+
+#include <arch/chip.h>
+#include <arch/abi.h>
+
+/* These must match struct pt_regs, below. */
+#if CHIP_WORD_SIZE() == 32
+#define PTREGS_OFFSET_REG(n)    ((n)*4)
+#else
+#define PTREGS_OFFSET_REG(n)    ((n)*8)
+#endif
+#define PTREGS_OFFSET_BASE      0
+#define PTREGS_OFFSET_TP        PTREGS_OFFSET_REG(53)
+#define PTREGS_OFFSET_SP        PTREGS_OFFSET_REG(54)
+#define PTREGS_OFFSET_LR        PTREGS_OFFSET_REG(55)
+#define PTREGS_NR_GPRS          56
+#define PTREGS_OFFSET_PC        PTREGS_OFFSET_REG(56)
+#define PTREGS_OFFSET_EX1       PTREGS_OFFSET_REG(57)
+#define PTREGS_OFFSET_FAULTNUM  PTREGS_OFFSET_REG(58)
+#define PTREGS_OFFSET_ORIG_R0   PTREGS_OFFSET_REG(59)
+#define PTREGS_OFFSET_FLAGS     PTREGS_OFFSET_REG(60)
+#if CHIP_HAS_CMPEXCH()
+#define PTREGS_OFFSET_CMPEXCH   PTREGS_OFFSET_REG(61)
+#endif
+#define PTREGS_SIZE             PTREGS_OFFSET_REG(64)
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+/* Benefit from consistent use of "long" on all chips. */
+typedef unsigned long pt_reg_t;
+#else
+/* Provide appropriate length type to userspace regardless of -m32/-m64. */
+typedef uint_reg_t pt_reg_t;
+#endif
+
+/*
+ * This struct defines the way the registers are stored on the stack during a
+ * system call/exception.  It should be a multiple of 8 bytes to preserve
+ * normal stack alignment rules.
+ *
+ * Must track <sys/ucontext.h> and <sys/procfs.h>
+ */
+struct pt_regs {
+	/* Saved main processor registers; 56..63 are special. */
+	/* tp, sp, and lr must immediately follow regs[] for aliasing. */
+	pt_reg_t regs[53];
+	pt_reg_t tp;		/* aliases regs[TREG_TP] */
+	pt_reg_t sp;		/* aliases regs[TREG_SP] */
+	pt_reg_t lr;		/* aliases regs[TREG_LR] */
+
+	/* Saved special registers. */
+	pt_reg_t pc;		/* stored in EX_CONTEXT_1_0 */
+	pt_reg_t ex1;		/* stored in EX_CONTEXT_1_1 (PL and ICS bit) */
+	pt_reg_t faultnum;	/* fault number (INT_SWINT_1 for syscall) */
+	pt_reg_t orig_r0;	/* r0 at syscall entry, else zero */
+	pt_reg_t flags;		/* flags (see below) */
+#if !CHIP_HAS_CMPEXCH()
+	pt_reg_t pad[3];
+#else
+	pt_reg_t cmpexch;	/* value of CMPEXCH_VALUE SPR at interrupt */
+	pt_reg_t pad[2];
+#endif
+};
+
+#endif /* __ASSEMBLY__ */
+
+/* Flag bits in pt_regs.flags */
+#define PT_FLAGS_DISABLE_IRQ    1  /* on return to kernel, disable irqs */
+#define PT_FLAGS_CALLER_SAVES   2  /* caller-save registers are valid */
+#define PT_FLAGS_RESTORE_REGS   4  /* restore callee-save regs on return */
+
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+#define PTRACE_GETFPREGS	14
+#define PTRACE_SETFPREGS	15
+
+/* Support TILE-specific ptrace options, with events starting at 16. */
+#define PTRACE_O_TRACEMIGRATE	0x00010000
+#define PTRACE_EVENT_MIGRATE	16
+#ifdef __KERNEL__
+#define PTRACE_O_MASK_TILE	(PTRACE_O_TRACEMIGRATE)
+#define PT_TRACE_MIGRATE	0x00080000
+#define PT_TRACE_MASK_TILE	(PT_TRACE_MIGRATE)
+#endif
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+
+/* Does the process account for user or for system time? */
+#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+
+/* Fill in a struct pt_regs with the current kernel registers. */
+struct pt_regs *get_pt_regs(struct pt_regs *);
+
+extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step()	(1)
+
+/*
+ * A structure for all single-stepper state.
+ *
+ * Also update defines in assembler section if it changes
+ */
+struct single_step_state {
+	/* the page to which we will write hacked-up bundles */
+	void *buffer;
+
+	union {
+		int flags;
+		struct {
+			unsigned long is_enabled:1, update:1, update_reg:6;
+		};
+	};
+
+	unsigned long orig_pc;		/* the original PC */
+	unsigned long next_pc;		/* return PC if no branch (PC + 1) */
+	unsigned long branch_next_pc;	/* return PC if we did branch/jump */
+	unsigned long update_value;	/* value to restore to update_target */
+};
+
+/* Single-step the instruction at regs->pc */
+extern void single_step_once(struct pt_regs *regs);
+
+struct task_struct;
+
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+			 int error_code);
+
+#ifdef __tilegx__
+/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define SINGLESTEP_STATE_MASK_IS_ENABLED      0x1
+#define SINGLESTEP_STATE_MASK_UPDATE          0x2
+#define SINGLESTEP_STATE_TARGET_LB              2
+#define SINGLESTEP_STATE_TARGET_UB              7
+
+#endif /* !__KERNEL__ */
+
+#endif /* _ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
new file mode 100644
index 0000000..04bc4db
--- /dev/null
+++ b/arch/tile/include/asm/resource.h
@@ -0,0 +1 @@
+#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
new file mode 100644
index 0000000..c560424
--- /dev/null
+++ b/arch/tile/include/asm/scatterlist.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SCATTERLIST_H
+#define _ASM_TILE_SCATTERLIST_H
+
+#define ISA_DMA_THRESHOLD	(~0UL)
+
+#include <asm-generic/scatterlist.h>
+
+#endif /* _ASM_TILE_SCATTERLIST_H */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
new file mode 100644
index 0000000..6c11149
--- /dev/null
+++ b/arch/tile/include/asm/sections.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SECTIONS_H
+#define _ASM_TILE_SECTIONS_H
+
+#define arch_is_kernel_data arch_is_kernel_data
+
+#include <asm-generic/sections.h>
+
+/* Text and data are at different areas in the kernel VA space. */
+extern char _sinitdata[], _einitdata[];
+
+/* Write-once data is writable only till the end of initialization. */
+extern char __w1data_begin[], __w1data_end[];
+
+extern char __feedback_section_start[], __feedback_section_end[];
+
+/* Handle the discontiguity between _sdata and _stext. */
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return addr >= (unsigned long)_sdata &&
+		addr < (unsigned long)_end;
+}
+
+#endif /* _ASM_TILE_SECTIONS_H */
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
new file mode 100644
index 0000000..7673b83
--- /dev/null
+++ b/arch/tile/include/asm/sembuf.h
@@ -0,0 +1 @@
+#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
new file mode 100644
index 0000000..823ddd4
--- /dev/null
+++ b/arch/tile/include/asm/setup.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SETUP_H
+#define _ASM_TILE_SETUP_H
+
+#include <linux/pfn.h>
+#include <linux/init.h>
+
+/*
+ * Reserved space for vmalloc and iomap - defined in asm/page.h
+ */
+#define MAXMEM_PFN	PFN_DOWN(MAXMEM)
+
+#define COMMAND_LINE_SIZE	2048
+
+void early_panic(const char *fmt, ...);
+void warn_early_printk(void);
+void __init disable_early_printk(void);
+
+#endif /* _ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
new file mode 100644
index 0000000..83c05fc
--- /dev/null
+++ b/arch/tile/include/asm/shmbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
new file mode 100644
index 0000000..93f30de
--- /dev/null
+++ b/arch/tile/include/asm/shmparam.h
@@ -0,0 +1 @@
+#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h
new file mode 100644
index 0000000..7cd7672
--- /dev/null
+++ b/arch/tile/include/asm/sigcontext.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGCONTEXT_H
+#define _ASM_TILE_SIGCONTEXT_H
+
+/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
+#include <asm/ptrace.h>
+
+/* Must track <sys/ucontext.h> */
+
+struct sigcontext {
+	struct pt_regs regs;
+};
+
+#endif /* _ASM_TILE_SIGCONTEXT_H */
diff --git a/arch/tile/include/asm/sigframe.h b/arch/tile/include/asm/sigframe.h
new file mode 100644
index 0000000..994d3d3
--- /dev/null
+++ b/arch/tile/include/asm/sigframe.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGFRAME_H
+#define _ASM_TILE_SIGFRAME_H
+
+/* Indicate that syscall return should not examine r0 */
+#define INT_SWINT_1_SIGRETURN (~0)
+
+#ifndef __ASSEMBLY__
+
+#include <arch/abi.h>
+
+struct rt_sigframe {
+	unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SIGFRAME_H */
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h
new file mode 100644
index 0000000..0c12d1b
--- /dev/null
+++ b/arch/tile/include/asm/siginfo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGINFO_H
+#define _ASM_TILE_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+/*
+ * Additional Tile-specific SIGILL si_codes
+ */
+#define ILL_DBLFLT	(__SI_FAULT|9)	/* double fault */
+#define ILL_HARDWALL	(__SI_FAULT|10)	/* user networks hardwall violation */
+#undef NSIGILL
+#define NSIGILL		10
+
+#endif /* _ASM_TILE_SIGINFO_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
new file mode 100644
index 0000000..d20d326
--- /dev/null
+++ b/arch/tile/include/asm/signal.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGNAL_H
+#define _ASM_TILE_SIGNAL_H
+
+/* Do not notify a ptracer when this signal is handled. */
+#define SA_NOPTRACE 0x02000000u
+
+/* Used in earlier Tilera releases, so keeping for binary compatibility. */
+#define SA_RESTORER 0x04000000u
+
+#include <asm-generic/signal.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *);
+int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
+#endif
+
+#endif /* _ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
new file mode 100644
index 0000000..da24858
--- /dev/null
+++ b/arch/tile/include/asm/smp.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SMP_H
+#define _ASM_TILE_SMP_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/processor.h>
+#include <linux/cpumask.h>
+#include <linux/irqreturn.h>
+
+/* Set up this tile to support receiving hypervisor messages */
+void init_messaging(void);
+
+/* Set up this tile to support receiving device interrupts and IPIs. */
+void init_per_tile_IRQs(void);
+
+/* Send a message to processors specified in mask */
+void send_IPI_many(const struct cpumask *mask, int tag);
+
+/* Send a message to all but the sending processor */
+void send_IPI_allbutself(int tag);
+
+/* Send a message to a specific processor */
+void send_IPI_single(int dest, int tag);
+
+/* Process an IPI message */
+void evaluate_message(int tag);
+
+/* Process an IRQ_RESCHEDULE IPI. */
+irqreturn_t handle_reschedule_ipi(int irq, void *token);
+
+/* Boot a secondary cpu */
+void online_secondary(void);
+
+/* Call a function on a specified set of CPUs (may include this one). */
+extern void on_each_cpu_mask(const struct cpumask *mask,
+			     void (*func)(void *), void *info, bool wait);
+
+/* Topology of the supervisor tile grid, and coordinates of boot processor */
+extern HV_Topology smp_topology;
+
+/* Accessors for grid size */
+#define smp_height		(smp_topology.height)
+#define smp_width		(smp_topology.width)
+
+/* Hypervisor message tags sent via the tile send_IPI*() routines. */
+#define MSG_TAG_START_CPU		1
+#define MSG_TAG_STOP_CPU		2
+#define MSG_TAG_CALL_FUNCTION_MANY	3
+#define MSG_TAG_CALL_FUNCTION_SINGLE	4
+
+/* Hook for the generic smp_call_function_many() routine. */
+static inline void arch_send_call_function_ipi_mask(struct cpumask *mask)
+{
+	send_IPI_many(mask, MSG_TAG_CALL_FUNCTION_MANY);
+}
+
+/* Hook for the generic smp_call_function_single() routine. */
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, MSG_TAG_CALL_FUNCTION_SINGLE);
+}
+
+/* Print out the boot string describing which cpus were disabled. */
+void print_disabled_cpus(void);
+
+#else /* !CONFIG_SMP */
+
+#define on_each_cpu_mask(mask, func, info, wait)		\
+  do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
+
+#define smp_master_cpu		0
+#define smp_height		1
+#define smp_width		1
+
+#endif /* !CONFIG_SMP */
+
+
+/* Which cpus may be used as the lotar in a page table entry. */
+extern struct cpumask cpu_lotar_map;
+#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Which processors are used for hash-for-home mapping */
+extern struct cpumask hash_for_home_map;
+#endif
+
+/* Which cpus can have their cache flushed by hv_flush_remote(). */
+extern struct cpumask cpu_cacheable_map;
+#define cpu_cacheable(cpu) cpumask_test_cpu((cpu), &cpu_cacheable_map)
+
+/* Convert an HV_LOTAR value into a cpu. */
+static inline int hv_lotar_to_cpu(HV_LOTAR lotar)
+{
+	return HV_LOTAR_X(lotar) + (HV_LOTAR_Y(lotar) * smp_width);
+}
+
+/*
+ * Extension of <linux/cpumask.h> functionality when you just want
+ * to express a mask or suppression or inclusion region without
+ * being too concerned about exactly which cpus are valid in that region.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits);
+
+#define cpulist_parse_crop(buf, dst) \
+			__cpulist_parse_crop((buf), (dst), NR_CPUS)
+static inline int __cpulist_parse_crop(const char *buf, struct cpumask *dstp,
+					int nbits)
+{
+	return bitmap_parselist_crop(buf, cpumask_bits(dstp), nbits);
+}
+
+#endif /* _ASM_TILE_SMP_H */
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
new file mode 100644
index 0000000..6b71384
--- /dev/null
+++ b/arch/tile/include/asm/socket.h
@@ -0,0 +1 @@
+#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
new file mode 100644
index 0000000..def6d47
--- /dev/null
+++ b/arch/tile/include/asm/sockios.h
@@ -0,0 +1 @@
+#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/spinlock.h b/arch/tile/include/asm/spinlock.h
new file mode 100644
index 0000000..1a8bd47
--- /dev/null
+++ b/arch/tile/include/asm/spinlock.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_H
+#define _ASM_TILE_SPINLOCK_H
+
+#ifdef __tilegx__
+#include <asm/spinlock_64.h>
+#else
+#include <asm/spinlock_32.h>
+#endif
+
+#endif /* _ASM_TILE_SPINLOCK_H */
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
new file mode 100644
index 0000000..f3a8473
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * 32-bit SMP spinlocks.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_32_H
+#define _ASM_TILE_SPINLOCK_32_H
+
+#include <asm/atomic.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <linux/compiler.h>
+
+/*
+ * We only use even ticket numbers so the '1' inserted by a tns is
+ * an unambiguous "ticket is busy" flag.
+ */
+#define TICKET_QUANTUM 2
+
+
+/*
+ * SMP ticket spinlocks, allowing only a single CPU anywhere
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	/*
+	 * Note that even if a new ticket is in the process of being
+	 * acquired, so lock->next_ticket is 1, it's still reasonable
+	 * to claim the lock is held, since it will be momentarily
+	 * if not already.  There's no need to wait for a "valid"
+	 * lock->next_ticket to become available.
+	 */
+	return lock->next_ticket != lock->current_ticket;
+}
+
+void arch_spin_lock(arch_spinlock_t *lock);
+
+/* We cannot take an interrupt after getting a ticket, so don't enable them. */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+int arch_spin_trylock(arch_spinlock_t *lock);
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	/* For efficiency, overlap fetching the old ticket with the wmb(). */
+	int old_ticket = lock->current_ticket;
+	wmb();  /* guarantee anything modified under the lock is visible */
+	lock->current_ticket = old_ticket + TICKET_QUANTUM;
+}
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock);
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * We use a "tns/store-back" technique on a single word to manage
+ * the lock state, looping around to retry if the tns returns 1.
+ */
+
+/* Internal layout of the word; do not use. */
+#define _WR_NEXT_SHIFT	8
+#define _WR_CURR_SHIFT  16
+#define _WR_WIDTH       8
+#define _RD_COUNT_SHIFT 24
+#define _RD_COUNT_WIDTH 8
+
+/* Internal functions; do not use. */
+void arch_read_lock_slow(arch_rwlock_t *, u32);
+int arch_read_trylock_slow(arch_rwlock_t *);
+void arch_read_unlock_slow(arch_rwlock_t *);
+void arch_write_lock_slow(arch_rwlock_t *, u32);
+void arch_write_unlock_slow(arch_rwlock_t *, u32);
+
+/**
+ * arch_read_can_lock() - would read_trylock() succeed?
+ */
+static inline int arch_read_can_lock(arch_rwlock_t *rwlock)
+{
+	return (rwlock->lock << _RD_COUNT_WIDTH) == 0;
+}
+
+/**
+ * arch_write_can_lock() - would write_trylock() succeed?
+ */
+static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
+{
+	return rwlock->lock == 0;
+}
+
+/**
+ * arch_read_lock() - acquire a read lock.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rwlock)
+{
+	u32 val = __insn_tns((int *)&rwlock->lock);
+	if (unlikely(val << _RD_COUNT_WIDTH)) {
+		arch_read_lock_slow(rwlock, val);
+		return;
+	}
+	rwlock->lock = val + (1 << _RD_COUNT_SHIFT);
+}
+
+/**
+ * arch_read_lock() - acquire a write lock.
+ */
+static inline void arch_write_lock(arch_rwlock_t *rwlock)
+{
+	u32 val = __insn_tns((int *)&rwlock->lock);
+	if (unlikely(val != 0)) {
+		arch_write_lock_slow(rwlock, val);
+		return;
+	}
+	rwlock->lock = 1 << _WR_NEXT_SHIFT;
+}
+
+/**
+ * arch_read_trylock() - try to acquire a read lock.
+ */
+static inline int arch_read_trylock(arch_rwlock_t *rwlock)
+{
+	int locked;
+	u32 val = __insn_tns((int *)&rwlock->lock);
+	if (unlikely(val & 1)) {
+		return arch_read_trylock_slow(rwlock);
+	}
+	locked = (val << _RD_COUNT_WIDTH) == 0;
+	rwlock->lock = val + (locked << _RD_COUNT_SHIFT);
+	return locked;
+}
+
+/**
+ * arch_write_trylock() - try to acquire a write lock.
+ */
+static inline int arch_write_trylock(arch_rwlock_t *rwlock)
+{
+	u32 val = __insn_tns((int *)&rwlock->lock);
+
+	/*
+	 * If a tns is in progress, or there's a waiting or active locker,
+	 * or active readers, we can't take the lock, so give up.
+	 */
+	if (unlikely(val != 0)) {
+		if (!(val & 1))
+			rwlock->lock = val;
+		return 0;
+	}
+
+	/* Set the "next" field to mark it locked. */
+	rwlock->lock = 1 << _WR_NEXT_SHIFT;
+	return 1;
+}
+
+/**
+ * arch_read_unlock() - release a read lock.
+ */
+static inline void arch_read_unlock(arch_rwlock_t *rwlock)
+{
+	u32 val;
+	mb();  /* guarantee anything modified under the lock is visible */
+	val = __insn_tns((int *)&rwlock->lock);
+	if (unlikely(val & 1)) {
+		arch_read_unlock_slow(rwlock);
+		return;
+	}
+	rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
+}
+
+/**
+ * arch_write_unlock() - release a write lock.
+ */
+static inline void arch_write_unlock(arch_rwlock_t *rwlock)
+{
+	u32 val;
+	mb();  /* guarantee anything modified under the lock is visible */
+	val = __insn_tns((int *)&rwlock->lock);
+	if (unlikely(val != (1 << _WR_NEXT_SHIFT))) {
+		arch_write_unlock_slow(rwlock, val);
+		return;
+	}
+	rwlock->lock = 0;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif /* _ASM_TILE_SPINLOCK_32_H */
diff --git a/arch/tile/include/asm/spinlock_types.h b/arch/tile/include/asm/spinlock_types.h
new file mode 100644
index 0000000..a71f59b
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_TYPES_H
+#define _ASM_TILE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+#ifdef __tilegx__
+
+/* Low 15 bits are "next"; high 15 bits are "current". */
+typedef struct arch_spinlock {
+	unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+/* High bit is "writer owns"; low 31 bits are a count of readers. */
+typedef struct arch_rwlock {
+	unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#else
+
+typedef struct arch_spinlock {
+	/* Next ticket number to hand out. */
+	int next_ticket;
+	/* The ticket number that currently owns this lock. */
+	int current_ticket;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0, 0 }
+
+/*
+ * Byte 0 for tns (only the low bit is used), byte 1 for ticket-lock "next",
+ * byte 2 for ticket-lock "current", byte 3 for reader count.
+ */
+typedef struct arch_rwlock {
+	unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
+#endif /* _ASM_TILE_SPINLOCK_TYPES_H */
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
new file mode 100644
index 0000000..864913b
--- /dev/null
+++ b/arch/tile/include/asm/stack.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_STACK_H
+#define _ASM_TILE_STACK_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/backtrace.h>
+#include <hv/hypervisor.h>
+
+/* Everything we need to keep track of a backtrace iteration */
+struct KBacktraceIterator {
+	BacktraceIterator it;
+	struct task_struct *task;     /* task we are backtracing */
+	HV_PTE *pgtable;	      /* page table for user space access */
+	int end;		      /* iteration complete. */
+	int new_context;              /* new context is starting */
+	int profile;                  /* profiling, so stop on async intrpt */
+	int verbose;		      /* printk extra info (don't want to
+				       * do this for profiling) */
+	int is_current;               /* backtracing current task */
+};
+
+/* Iteration methods for kernel backtraces */
+
+/*
+ * Initialize a KBacktraceIterator from a task_struct, and optionally from
+ * a set of registers.  If the registers are omitted, the process is
+ * assumed to be descheduled, and registers are read from the process's
+ * thread_struct and stack.  "verbose" means to printk some additional
+ * information about fault handlers as we pass them on the stack.
+ */
+extern void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
+				    struct task_struct *, struct pt_regs *);
+
+/* Initialize iterator based on current stack. */
+extern void KBacktraceIterator_init_current(struct KBacktraceIterator *kbt);
+
+/* No more frames? */
+extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
+
+/* Advance to the next frame. */
+extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
+
+/*
+ * Dump stack given complete register info. Use only from the
+ * architecture-specific code; show_stack()
+ * and dump_stack() (in entry.S) are architecture-independent entry points.
+ */
+extern void tile_show_stack(struct KBacktraceIterator *, int headers);
+
+/* Dump stack of current process, with registers to seed the backtrace. */
+extern void dump_stack_regs(struct pt_regs *);
+
+
+#endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h
new file mode 100644
index 0000000..3dc90fa
--- /dev/null
+++ b/arch/tile/include/asm/stat.h
@@ -0,0 +1 @@
+#include <asm-generic/stat.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
new file mode 100644
index 0000000..0b91fe1
--- /dev/null
+++ b/arch/tile/include/asm/statfs.h
@@ -0,0 +1 @@
+#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
new file mode 100644
index 0000000..7535cf1
--- /dev/null
+++ b/arch/tile/include/asm/string.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_STRING_H
+#define _ASM_TILE_STRING_H
+
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMMOVE
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRLEN
+
+extern __kernel_size_t strlen(const char *);
+extern char *strchr(const char *s, int c);
+extern void *memchr(const void *s, int c, size_t n);
+extern void *memset(void *, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *memmove(void *, const void *, __kernel_size_t);
+
+#endif /* _ASM_TILE_STRING_H */
diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/asm/swab.h
new file mode 100644
index 0000000..25c686a
--- /dev/null
+++ b/arch/tile/include/asm/swab.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SWAB_H
+#define _ASM_TILE_SWAB_H
+
+/* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */
+#define __arch_swab32(x) __builtin_bswap32(x)
+#define __arch_swab64(x) __builtin_bswap64(x)
+
+/* Use the variant that is natural for the wordsize. */
+#ifdef CONFIG_64BIT
+#define __arch_swab16(x) (__builtin_bswap64(x) >> 48)
+#else
+#define __arch_swab16(x) (__builtin_bswap32(x) >> 16)
+#endif
+
+#endif /* _ASM_TILE_SWAB_H */
diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h
new file mode 100644
index 0000000..d35e0dc
--- /dev/null
+++ b/arch/tile/include/asm/syscall.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_TILE_SYSCALL_H
+#define _ASM_TILE_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <arch/abi.h>
+
+/*
+ * Only the low 32 bits of orig_r0 are meaningful, so we return int.
+ * This importantly ignores the high bits on 64-bit, so comparisons
+ * sign-extend the low 32 bits.
+ */
+static inline int syscall_get_nr(struct task_struct *t, struct pt_regs *regs)
+{
+	return regs->regs[TREG_SYSCALL_NR];
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->regs[0] = regs->orig_r0;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->regs[0];
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->regs[0] = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &regs[i], n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(&regs[i], args, n * sizeof(args[0]));
+}
+
+#endif	/* _ASM_TILE_SYSCALL_H */
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
new file mode 100644
index 0000000..e1be54d
--- /dev/null
+++ b/arch/tile/include/asm/syscalls.h
@@ -0,0 +1,60 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SYSCALLS_H
+#define _ASM_TILE_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/signal.h>
+#include <linux/types.h>
+
+/* kernel/process.c */
+int sys_fork(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+int sys_clone(unsigned long clone_flags, unsigned long newsp,
+	      int __user *parent_tidptr, int __user *child_tidptr,
+	      struct pt_regs *);
+int sys_execve(char __user *path, char __user *__user *argv,
+	       char __user *__user *envp, struct pt_regs *);
+
+/* kernel/signal.c */
+int sys_sigaltstack(const stack_t __user *, stack_t __user *,
+		    struct pt_regs *);
+long sys_rt_sigreturn(struct pt_regs *);
+int sys_raise_fpe(int code, unsigned long addr, struct pt_regs*);
+
+/* kernel/sys.c */
+ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count);
+long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
+		     u32 len, int advice);
+int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
+		       u32 len_lo, u32 len_hi, int advice);
+long sys_flush_cache(void);
+long sys_mmap(unsigned long addr, unsigned long len,
+	      unsigned long prot, unsigned long flags,
+	      unsigned long fd, unsigned long offset);
+long sys_mmap2(unsigned long addr, unsigned long len,
+	       unsigned long prot, unsigned long flags,
+	       unsigned long fd, unsigned long offset);
+
+#ifndef __tilegx__
+/* mm/fault.c */
+int sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
+#endif
+
+#endif /* _ASM_TILE_SYSCALLS_H */
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
new file mode 100644
index 0000000..d6ca7f8
--- /dev/null
+++ b/arch/tile/include/asm/system.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SYSTEM_H
+#define _ASM_TILE_SYSTEM_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/irqflags.h>
+
+/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
+#include <asm/ptrace.h>
+
+#include <arch/chip.h>
+#include <arch/sim_def.h>
+#include <arch/spr_def.h>
+
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
+
+#define read_barrier_depends()	do { } while (0)
+
+#define __sync()	__insn_mf()
+
+#if CHIP_HAS_SPLIT_CYCLE()
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
+#else
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE)   /* just get all 64 bits */
+#endif
+
+/* Fence to guarantee visibility of stores to incoherent memory. */
+static inline void
+mb_incoherent(void)
+{
+	__insn_mf();
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+	{
+		int __mb_incoherent(void);
+#if CHIP_HAS_TILE_WRITE_PENDING()
+		const unsigned long WRITE_TIMEOUT_CYCLES = 400;
+		unsigned long start = get_cycles_low();
+		do {
+			if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
+				return;
+		} while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
+#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
+		(void) __mb_incoherent();
+	}
+#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
+}
+
+#define fast_wmb()	__sync()
+#define fast_rmb()	__sync()
+#define fast_mb()	__sync()
+#define fast_iob()	mb_incoherent()
+
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+#define mb()		fast_mb()
+#define iob()		fast_iob()
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define smp_read_barrier_depends()	read_barrier_depends()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+#endif
+
+#define set_mb(var, value) \
+	do { var = value; mb(); } while (0)
+
+#include <linux/irqflags.h>
+
+/*
+ * Pause the DMA engine and static network before task switching.
+ */
+#define prepare_arch_switch(next) _prepare_arch_switch(next)
+void _prepare_arch_switch(struct task_struct *next);
+
+
+/*
+ * switch_to(n) should switch tasks to task nr n, first
+ * checking that n isn't the current task, in which case it does nothing.
+ * The number of callee-saved registers saved on the kernel stack
+ * is defined here for use in copy_thread() and must agree with __switch_to().
+ */
+#endif /* !__ASSEMBLY__ */
+#define CALLEE_SAVED_FIRST_REG 30
+#define CALLEE_SAVED_REGS_COUNT 24   /* r30 to r52, plus an empty to align */
+#ifndef __ASSEMBLY__
+struct task_struct;
+#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
+extern struct task_struct *_switch_to(struct task_struct *prev,
+				      struct task_struct *next);
+
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible:
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
+#define arch_align_stack(x) (x)
+
+/*
+ * Is the kernel doing fixups of unaligned accesses?  If <0, no kernel
+ * intervention occurs and SIGBUS is delivered with no data address
+ * info.  If 0, the kernel single-steps the instruction to discover
+ * the data address to provide with the SIGBUS.  If 1, the kernel does
+ * a fixup.
+ */
+extern int unaligned_fixup;
+
+/* Is the kernel printing on each unaligned fixup? */
+extern int unaligned_printk;
+
+/* Number of unaligned fixups performed */
+extern unsigned int unaligned_fixup_count;
+
+/* User-level DMA management functions */
+void grant_dma_mpls(void);
+void restrict_dma_mpls(void);
+
+
+/* Invoke the simulator "syscall" mechanism (see arch/tile/kernel/entry.S). */
+extern int _sim_syscall(int syscall_num, ...);
+#define sim_syscall(syscall_num, ...) \
+	_sim_syscall(SIM_CONTROL_SYSCALL + \
+		((syscall_num) << _SIM_CONTROL_OPERATOR_BITS), \
+		## __VA_ARGS__)
+
+/*
+ * Kernel threads can check to see if they need to migrate their
+ * stack whenever they return from a context switch; for user
+ * threads, we defer until they are returning to user-space.
+ */
+#define finish_arch_switch(prev) do {                                     \
+	if (unlikely((prev)->state == TASK_DEAD))                         \
+		__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT |       \
+			((prev)->pid << _SIM_CONTROL_OPERATOR_BITS));     \
+	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH |             \
+		(current->pid << _SIM_CONTROL_OPERATOR_BITS));            \
+	if (current->mm == NULL && !kstack_hash &&                        \
+	    current_thread_info()->homecache_cpu != smp_processor_id())   \
+		homecache_migrate_kthread();                              \
+} while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SYSTEM_H */
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
new file mode 100644
index 0000000..3935b10
--- /dev/null
+++ b/arch/tile/include/asm/termbits.h
@@ -0,0 +1 @@
+#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
new file mode 100644
index 0000000..280d78a
--- /dev/null
+++ b/arch/tile/include/asm/termios.h
@@ -0,0 +1 @@
+#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
new file mode 100644
index 0000000..9024bf3
--- /dev/null
+++ b/arch/tile/include/asm/thread_info.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_THREAD_INFO_H
+#define _ASM_TILE_THREAD_INFO_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#ifndef __ASSEMBLY__
+
+/*
+ * Low level task data that assembly code needs immediate access to.
+ * The structure is placed at the bottom of the supervisor stack.
+ */
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	unsigned long		status;		/* thread-synchronous flags */
+	__u32			homecache_cpu;	/* CPU we are homecached on */
+	__u32			cpu;		/* current CPU */
+	int			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+
+	mm_segment_t		addr_limit;	/* thread address space
+						   (KERNEL_DS or USER_DS) */
+	struct restart_block	restart_block;
+	struct single_step_state *step_state;	/* single step state
+						   (if non-zero) */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure.
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block	= {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+	.step_state	= 0,			\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+#endif /* !__ASSEMBLY__ */
+
+#if PAGE_SIZE < 8192
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER (0)
+#endif
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
+
+#define STACK_WARN             (THREAD_SIZE/8)
+
+#ifndef __ASSEMBLY__
+
+/* How to get the thread information struct from C. */
+register unsigned long stack_pointer __asm__("sp");
+
+#define current_thread_info() \
+  ((struct thread_info *)(stack_pointer & -THREAD_SIZE))
+
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+extern struct thread_info *alloc_thread_info(struct task_struct *task);
+extern void free_thread_info(struct thread_info *info);
+
+/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
+extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
+				  unsigned long new_sp,
+				  unsigned long new_ss10);
+
+#else /* __ASSEMBLY__ */
+
+/* how to get the thread information struct from ASM */
+#ifdef __tilegx__
+#define GET_THREAD_INFO(reg) move reg, sp; mm reg, zero, LOG2_THREAD_SIZE, 63
+#else
+#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE		0x10000000
+
+/*
+ * Thread information flags that various assembly files may need to access.
+ * Keep flags accessed frequently in low bits, particular since it makes
+ * it easier to build constants in assembly.
+ */
+#define TIF_SIGPENDING		0	/* signal pending */
+#define TIF_NEED_RESCHED	1	/* rescheduling necessary */
+#define TIF_SINGLESTEP		2	/* restore singlestep on return to
+					   user mode */
+#define TIF_ASYNC_TLB		3	/* got an async TLB fault in kernel */
+#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+#define TIF_SECCOMP		6	/* secure computing */
+#define TIF_MEMDIE		7	/* OOM killer at work */
+
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_ASYNC_TLB		(1<<TIF_ASYNC_TLB)
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_MEMDIE		(1<<TIF_MEMDIE)
+
+/* Work to do on any return to user space. */
+#define _TIF_ALLWORK_MASK \
+  (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|_TIF_ASYNC_TLB)
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#ifdef __tilegx__
+#define TS_COMPAT		0x0001	/* 32-bit compatibility mode */
+#endif
+#define TS_POLLING		0x0004	/* in idle loop but not sleeping */
+#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal */
+#define TS_EXEC_HASH_SET	0x0010	/* apply TS_EXEC_HASH_xxx flags */
+#define TS_EXEC_HASH_RO		0x0020	/* during exec, hash r/o segments */
+#define TS_EXEC_HASH_RW		0x0040	/* during exec, hash r/w segments */
+#define TS_EXEC_HASH_STACK	0x0080	/* during exec, hash the stack */
+#define TS_EXEC_HASH_FLAGS	0x00f0	/* mask for TS_EXEC_HASH_xxx flags */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, &ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_THREAD_INFO_H */
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
new file mode 100644
index 0000000..3baf5fc
--- /dev/null
+++ b/arch/tile/include/asm/timex.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TIMEX_H
+#define _ASM_TILE_TIMEX_H
+
+/*
+ * This rate should be a multiple of the possible HZ values (100, 250, 1000)
+ * and a fraction of the possible hardware timer frequencies.  Our timer
+ * frequency is highly tunable but also quite precise, so for the primary use
+ * of this value (setting ACT_HZ from HZ) we just pick a value that causes
+ * ACT_HZ to be set to HZ.  We make the value somewhat large just to be
+ * more robust in case someone tries out a new value of HZ.
+ */
+#define CLOCK_TICK_RATE	1000000
+
+typedef unsigned long long cycles_t;
+
+#if CHIP_HAS_SPLIT_CYCLE()
+cycles_t get_cycles(void);
+#else
+static inline cycles_t get_cycles(void)
+{
+	return __insn_mfspr(SPR_CYCLE);
+}
+#endif
+
+cycles_t get_clock_rate(void);
+
+/* Called at cpu initialization to set some low-level constants. */
+void setup_clock(void);
+
+/* Called at cpu initialization to start the tile-timer clock device. */
+void setup_tile_timer(void);
+
+#endif /* _ASM_TILE_TIMEX_H */
diff --git a/arch/tile/include/asm/tlb.h b/arch/tile/include/asm/tlb.h
new file mode 100644
index 0000000..4a891a1
--- /dev/null
+++ b/arch/tile/include/asm/tlb.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TLB_H
+#define _ASM_TILE_TLB_H
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_TILE_TLB_H */
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
new file mode 100644
index 0000000..96199d2
--- /dev/null
+++ b/arch/tile/include/asm/tlbflush.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TLBFLUSH_H
+#define _ASM_TILE_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+/*
+ * Rather than associating each mm with its own ASID, we just use
+ * ASIDs to allow us to lazily flush the TLB when we switch mms.
+ * This way we only have to do an actual TLB flush on mm switch
+ * every time we wrap ASIDs, not every single time we switch.
+ *
+ * FIXME: We might improve performance by keeping ASIDs around
+ * properly, though since the hypervisor direct-maps VAs to TSB
+ * entries, we're likely to have lost at least the executable page
+ * mappings by the time we switch back to the original mm.
+ */
+DECLARE_PER_CPU(int, current_asid);
+
+/* The hypervisor tells us what ASIDs are available to us. */
+extern int min_asid, max_asid;
+
+static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
+{
+	return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
+}
+
+/* Pass as vma pointer for non-executable mapping, if no vma available. */
+#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+
+/* Flush a single user page on this cpu. */
+static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long page_size)
+{
+	int rc = hv_flush_page(addr, page_size);
+	if (rc < 0)
+		panic("hv_flush_page(%#lx,%#lx) failed: %d",
+		      addr, page_size, rc);
+	if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+		__flush_icache();
+}
+
+/* Flush range of user pages on this cpu. */
+static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+					 unsigned long addr,
+					 unsigned long page_size,
+					 unsigned long len)
+{
+	int rc = hv_flush_pages(addr, page_size, len);
+	if (rc < 0)
+		panic("hv_flush_pages(%#lx,%#lx,%#lx) failed: %d",
+		      addr, page_size, len, rc);
+	if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+		__flush_icache();
+}
+
+/* Flush all user pages on this cpu. */
+static inline void local_flush_tlb(void)
+{
+	int rc = hv_flush_all(1);   /* preserve global mappings */
+	if (rc < 0)
+		panic("hv_flush_all(1) failed: %d", rc);
+	__flush_icache();
+}
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+static inline void local_flush_tlb_all(void)
+{
+	int i;
+	for (i = 0; ; ++i) {
+		HV_VirtAddrRange r = hv_inquire_virtual(i);
+		if (r.size == 0)
+			break;
+		local_flush_tlb_pages(NULL, r.start, PAGE_SIZE, r.size);
+		local_flush_tlb_pages(NULL, r.start, HPAGE_SIZE, r.size);
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * Here (as in vm_area_struct), "end" means the first byte after
+ * our end address.
+ */
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page_mm(const struct vm_area_struct *,
+			      struct mm_struct *, unsigned long);
+extern void flush_tlb_range(const struct vm_area_struct *,
+			    unsigned long start, unsigned long end);
+
+#define flush_tlb()     flush_tlb_current_task()
+
+#endif /* _ASM_TILE_TLBFLUSH_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
new file mode 100644
index 0000000..343172d
--- /dev/null
+++ b/arch/tile/include/asm/topology.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TOPOLOGY_H
+#define _ASM_TILE_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/cpumask.h>
+
+/* Mappings between logical cpu number and node number. */
+extern struct cpumask node_2_cpu_mask[];
+extern char cpu_2_node[];
+
+/* Returns the number of the node containing CPU 'cpu'. */
+static inline int cpu_to_node(int cpu)
+{
+	return cpu_2_node[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'.
+ * This architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return &node_2_cpu_mask[node];
+}
+
+/* For now, use numa node -1 for global allocation. */
+#define pcibus_to_node(bus)		((void)(bus), -1)
+
+/* sched_domains SD_NODE_INIT for TILE architecture */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= 1,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 2,			\
+	.wake_idx		= 1,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
+				| SD_WAKE_AFFINE	\
+				| SD_SERIALIZE,		\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+}
+
+/* By definition, we create nodes based on online memory. */
+#define node_has_online_mem(nid) 1
+
+#endif /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#ifdef CONFIG_SMP
+#define topology_physical_package_id(cpu)       ((void)(cpu), 0)
+#define topology_core_id(cpu)                   (cpu)
+#define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
+#define topology_thread_cpumask(cpu)            cpumask_of(cpu)
+
+/* indicates that pointers to the topology struct cpumask maps are valid */
+#define arch_provides_topology_pointers         yes
+#endif
+
+#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
new file mode 100644
index 0000000..eab33d4
--- /dev/null
+++ b/arch/tile/include/asm/traps.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TRAPS_H
+#define _ASM_TILE_TRAPS_H
+
+/* mm/fault.c */
+void do_page_fault(struct pt_regs *, int fault_num,
+		   unsigned long address, unsigned long write);
+
+/* kernel/traps.c */
+void do_trap(struct pt_regs *, int fault_num, unsigned long reason);
+
+/* kernel/time.c */
+void do_timer_interrupt(struct pt_regs *, int fault_num);
+
+/* kernel/messaging.c */
+void hv_message_intr(struct pt_regs *, int intnum);
+
+/* kernel/irq.c */
+void tile_dev_intr(struct pt_regs *, int intnum);
+
+
+
+#endif /* _ASM_TILE_SYSCALLS_H */
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
new file mode 100644
index 0000000..b9e79bc
--- /dev/null
+++ b/arch/tile/include/asm/types.h
@@ -0,0 +1 @@
+#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
new file mode 100644
index 0000000..f3058af
--- /dev/null
+++ b/arch/tile/include/asm/uaccess.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_UACCESS_H
+#define _ASM_TILE_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm-generic/uaccess-unaligned.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+#define MAKE_MM_SEG(a)  ((mm_segment_t) { (a) })
+
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#ifndef __tilegx__
+/*
+ * We could allow mapping all 16 MB at 0xfc000000, but we set up a
+ * special hack in arch_setup_additional_pages() to auto-create a mapping
+ * for the first 16 KB, and it would seem strange to have different
+ * user-accessible semantics for memory at 0xfc000000 and above 0xfc004000.
+ */
+static inline int is_arch_mappable_range(unsigned long addr,
+					 unsigned long size)
+{
+	return (addr >= MEM_USER_INTRPT &&
+		addr < (MEM_USER_INTRPT + INTRPT_SIZE) &&
+		size <= (MEM_USER_INTRPT + INTRPT_SIZE) - addr);
+}
+#define is_arch_mappable_range is_arch_mappable_range
+#else
+#define is_arch_mappable_range(addr, size) 0
+#endif
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ */
+int __range_ok(unsigned long addr, unsigned long size);
+
+/**
+ * access_ok: - Checks if a user space pointer is valid
+ * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
+ *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
+ *        to write to a block, it is always safe to read from it.
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * Note that, depending on architecture, this function probably just
+ * checks that the pointer is in the user space range - after calling
+ * this function, memory access functions may still return -EFAULT.
+ */
+#define access_ok(type, addr, size) \
+	(likely(__range_ok((unsigned long)addr, size) == 0))
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * We return the __get_user_N function results in a structure,
+ * thus in r0 and r1.  If "err" is zero, "val" is the result
+ * of the read; otherwise, "err" is -EFAULT.
+ *
+ * We rarely need 8-byte values on a 32-bit architecture, but
+ * we size the structure to accommodate.  In practice, for the
+ * the smaller reads, we can zero the high word for free, and
+ * the caller will ignore it by virtue of casting anyway.
+ */
+struct __get_user {
+	unsigned long long val;
+	int err;
+};
+
+/*
+ * FIXME: we should express these as inline extended assembler, since
+ * they're fundamentally just a variable dereference and some
+ * supporting exception_table gunk.  Note that (a la i386) we can
+ * extend the copy_to_user and copy_from_user routines to call into
+ * such extended assembler routines, though we will have to use a
+ * different return code in that case (1, 2, or 4, rather than -EFAULT).
+ */
+extern struct __get_user __get_user_1(const void *);
+extern struct __get_user __get_user_2(const void *);
+extern struct __get_user __get_user_4(const void *);
+extern struct __get_user __get_user_8(const void *);
+extern int __put_user_1(long, void *);
+extern int __put_user_2(long, void *);
+extern int __put_user_4(long, void *);
+extern int __put_user_8(long long, void *);
+
+/* Unimplemented routines to cause linker failures */
+extern struct __get_user __get_user_bad(void);
+extern int __put_user_bad(void);
+
+/*
+ * Careful: we have to cast the result to the type of the pointer
+ * for sign reasons.
+ */
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ */
+#define __get_user(x, ptr)						\
+({	struct __get_user __ret;					\
+	__typeof__(*(ptr)) const __user *__gu_addr = (ptr);		\
+	__chk_user_ptr(__gu_addr);					\
+	switch (sizeof(*(__gu_addr))) {					\
+	case 1:								\
+		__ret = __get_user_1(__gu_addr);			\
+		break;							\
+	case 2:								\
+		__ret = __get_user_2(__gu_addr);			\
+		break;							\
+	case 4:								\
+		__ret = __get_user_4(__gu_addr);			\
+		break;							\
+	case 8:								\
+		__ret = __get_user_8(__gu_addr);			\
+		break;							\
+	default:							\
+		__ret = __get_user_bad();				\
+		break;							\
+	}								\
+	(x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
+	  __ret.val;			                                \
+	__ret.err;							\
+})
+
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ *
+ * Implementation note: The "case 8" logic of casting to the type of
+ * the result of subtracting the value from itself is basically a way
+ * of keeping all integer types the same, but casting any pointers to
+ * ptrdiff_t, i.e. also an integer type.  This way there are no
+ * questionable casts seen by the compiler on an ILP32 platform.
+ */
+#define __put_user(x, ptr)						\
+({									\
+	int __pu_err = 0;						\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
+	typeof(*__pu_addr) __pu_val = (x);				\
+	__chk_user_ptr(__pu_addr);					\
+	switch (sizeof(__pu_val)) {					\
+	case 1:								\
+		__pu_err = __put_user_1((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 2:								\
+		__pu_err = __put_user_2((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 4:								\
+		__pu_err = __put_user_4((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 8:								\
+		__pu_err =						\
+		  __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
+			__pu_addr);					\
+		break;							\
+	default:							\
+		__pu_err = __put_user_bad();				\
+		break;							\
+	}								\
+	__pu_err;							\
+})
+
+/*
+ * The versions of get_user and put_user without initial underscores
+ * check the address of their arguments to make sure they are not
+ * in kernel space.
+ */
+#define put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *__Pu_addr = (ptr);			\
+	access_ok(VERIFY_WRITE, (__Pu_addr), sizeof(*(__Pu_addr))) ?	\
+		__put_user((x), (__Pu_addr)) :				\
+		-EFAULT;						\
+})
+
+#define get_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) const __user *__Gu_addr = (ptr);		\
+	access_ok(VERIFY_READ, (__Gu_addr), sizeof(*(__Gu_addr))) ?	\
+		__get_user((x), (__Gu_addr)) :				\
+		((x) = 0, -EFAULT);					\
+})
+
+/**
+ * __copy_to_user() - copy data into user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * An alternate version - __copy_to_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable().
+ */
+extern unsigned long __must_check __copy_to_user_inatomic(
+	void __user *to, const void *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_fault();
+	return __copy_to_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+/**
+ * __copy_from_user() - copy data from user space, with less checking.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ *
+ * An alternate version - __copy_from_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable().  This version
+ * does *NOT* pad with zeros.
+ */
+extern unsigned long __must_check __copy_from_user_inatomic(
+	void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __copy_from_user_zeroing(
+	void *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       might_fault();
+       return __copy_from_user_zeroing(to, from, n);
+}
+
+static inline unsigned long __must_check
+_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+#ifdef CONFIG_DEBUG_COPY_FROM_USER
+extern void copy_from_user_overflow(void)
+	__compiletime_warning("copy_from_user() size is not provably correct");
+
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+
+	if (likely(sz == -1 || sz >= n))
+		n = _copy_from_user(to, from, n);
+	else
+		copy_from_user_overflow();
+
+	return n;
+}
+#else
+#define copy_from_user _copy_from_user
+#endif
+
+#ifdef __tilegx__
+/**
+ * __copy_in_user() - copy data within user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to user space.  Caller must check
+ * the specified blocks with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+extern unsigned long __copy_in_user_asm(
+	void __user *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	might_sleep();
+	return __copy_in_user_asm(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n))
+		n = __copy_in_user(to, from, n);
+	return n;
+}
+#endif
+
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+extern long strnlen_user_asm(const char __user *str, long n);
+static inline long __must_check strnlen_user(const char __user *str, long n)
+{
+	might_fault();
+	return strnlen_user_asm(str, n);
+}
+#define strlen_user(str) strnlen_user(str, LONG_MAX)
+
+/**
+ * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from userspace to kernel space.
+ * Caller must check the specified block with access_ok() before calling
+ * this function.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
+static inline long __must_check __strncpy_from_user(
+	char *dst, const char __user *src, long count)
+{
+	might_fault();
+	return strncpy_from_user_asm(dst, src, count);
+}
+static inline long __must_check strncpy_from_user(
+	char *dst, const char __user *src, long count)
+{
+	if (access_ok(VERIFY_READ, src, 1))
+		return __strncpy_from_user(dst, src, count);
+	return -EFAULT;
+}
+
+/**
+ * clear_user: - Zero a block of memory in user space.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern unsigned long clear_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __clear_user(
+	void __user *mem, unsigned long len)
+{
+	might_fault();
+	return clear_user_asm(mem, len);
+}
+static inline unsigned long __must_check clear_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __clear_user(mem, len);
+	return len;
+}
+
+/**
+ * flush_user: - Flush a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to flush.
+ *
+ * Returns number of bytes that could not be flushed.
+ * On success, this will be zero.
+ */
+extern unsigned long flush_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __flush_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = flush_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+
+static inline unsigned long __must_check flush_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __flush_user(mem, len);
+	return len;
+}
+
+/**
+ * inv_user: - Invalidate a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be invalidated.
+ * On success, this will be zero.
+ *
+ * Note that on Tile64, the "inv" operation is in fact a
+ * "flush and invalidate", so cache write-backs will occur prior
+ * to the cache being marked invalid.
+ */
+extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __inv_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = inv_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+static inline unsigned long __must_check inv_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __inv_user(mem, len);
+	return len;
+}
+
+/**
+ * finv_user: - Flush-inval a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be flush-invalidated.
+ * On success, this will be zero.
+ */
+extern unsigned long finv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __finv_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = finv_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+static inline unsigned long __must_check finv_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __finv_user(mem, len);
+	return len;
+}
+
+#endif /* _ASM_TILE_UACCESS_H */
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
new file mode 100644
index 0000000..9bc07b9
--- /dev/null
+++ b/arch/tile/include/asm/ucontext.h
@@ -0,0 +1 @@
+#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
new file mode 100644
index 0000000..137e2de
--- /dev/null
+++ b/arch/tile/include/asm/unaligned.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_UNALIGNED_H
+#define _ASM_TILE_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+
+#endif /* _ASM_TILE_UNALIGNED_H */
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
new file mode 100644
index 0000000..03b3d5d
--- /dev/null
+++ b/arch/tile/include/asm/unistd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_TILE_UNISTD_H
+
+
+#ifndef __LP64__
+/* Use the flavor of this syscall that matches the 32-bit API better. */
+#define __ARCH_WANT_SYNC_FILE_RANGE2
+#endif
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+#ifndef __tilegx__
+/* "Fast" syscalls provide atomic support for 32-bit chips. */
+#define __NR_FAST_cmpxchg	-1
+#define __NR_FAST_atomic_update	-2
+#define __NR_FAST_cmpxchg64	-3
+#define __NR_cmpxchg_badaddr	(__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
+#endif
+
+/* Additional Tilera-specific syscalls. */
+#define __NR_flush_cache	(__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_flush_cache, sys_flush_cache)
+
+#ifdef __KERNEL__
+/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_SYS_LLSEEK
+#endif
+#endif
+
+#endif /* _ASM_TILE_UNISTD_H */
diff --git a/arch/tile/include/asm/user.h b/arch/tile/include/asm/user.h
new file mode 100644
index 0000000..cbc8b4d
--- /dev/null
+++ b/arch/tile/include/asm/user.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_USER_H
+#define _ASM_TILE_USER_H
+
+/* This header is for a.out file formats, which TILE does not support. */
+
+#endif /* _ASM_TILE_USER_H */
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
new file mode 100644
index 0000000..c82eb12
--- /dev/null
+++ b/arch/tile/include/asm/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_pcie_rc_intf.h b/arch/tile/include/hv/drv_pcie_rc_intf.h
new file mode 100644
index 0000000..9bd2243
--- /dev/null
+++ b/arch/tile/include/hv/drv_pcie_rc_intf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_pcie_rc_intf.h
+ * Interface definitions for the PCIE Root Complex.
+ */
+
+#ifndef _SYS_HV_DRV_PCIE_RC_INTF_H
+#define _SYS_HV_DRV_PCIE_RC_INTF_H
+
+/** File offset for reading the interrupt base number used for PCIE legacy
+    interrupts and PLX Gen 1 requirement flag */
+#define PCIE_RC_CONFIG_MASK_OFF 0
+
+
+/**
+ * Structure used for obtaining PCIe config information, read from the PCIE
+ * subsystem /ctl file at initialization
+ */
+typedef struct pcie_rc_config
+{
+  int intr;                     /**< interrupt number used for downcall */
+  int plx_gen1;                 /**< flag for PLX Gen 1 configuration */
+} pcie_rc_config_t;
+
+#endif  /* _SYS_HV_DRV_PCIE_RC_INTF_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
new file mode 100644
index 0000000..84b3155
--- /dev/null
+++ b/arch/tile/include/hv/hypervisor.h
@@ -0,0 +1,2366 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file hypervisor.h
+ * The hypervisor's public API.
+ */
+
+#ifndef _TILE_HV_H
+#define _TILE_HV_H
+
+#ifdef __tile__
+#include <arch/chip.h>
+#else
+/* HACK: Allow use by "tools/cpack/". */
+#include "install/include/arch/chip.h"
+#endif
+
+/* Linux builds want unsigned long constants, but assembler wants numbers */
+#ifdef __ASSEMBLER__
+/** One, for assembler */
+#define __HV_SIZE_ONE 1
+#elif !defined(__tile__) && CHIP_VA_WIDTH() > 32
+/** One, for 64-bit on host */
+#define __HV_SIZE_ONE 1ULL
+#else
+/** One, for Linux */
+#define __HV_SIZE_ONE 1UL
+#endif
+
+
+/** The log2 of the span of a level-1 page table, in bytes.
+ */
+#define HV_LOG2_L1_SPAN 32
+
+/** The span of a level-1 page table, in bytes.
+ */
+#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN)
+
+/** The log2 of the size of small pages, in bytes. This value should
+ * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_LOG2_PAGE_SIZE_SMALL 16
+
+/** The size of small pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** The log2 of the size of large pages, in bytes. This value should be
+ * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_LOG2_PAGE_SIZE_LARGE 24
+
+/** The size of large pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE)
+
+/** The log2 of the granularity at which page tables must be aligned;
+ *  in other words, the CPA for a page table must have this many zero
+ *  bits at the bottom of the address.
+ */
+#define HV_LOG2_PAGE_TABLE_ALIGN 11
+
+/** The granularity at which page tables must be aligned.
+ */
+#define HV_PAGE_TABLE_ALIGN (__HV_SIZE_ONE << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Normal start of hypervisor glue in client physical memory. */
+#define HV_GLUE_START_CPA 0x10000
+
+/** This much space is reserved at HV_GLUE_START_CPA
+ * for the hypervisor glue. The client program must start at
+ * some address higher than this, and in particular the address of
+ * its text section should be equal to zero modulo HV_PAGE_SIZE_LARGE
+ * so that relative offsets to the HV glue are correct.
+ */
+#define HV_GLUE_RESERVED_SIZE 0x10000
+
+/** Each entry in the hv dispatch array takes this many bytes. */
+#define HV_DISPATCH_ENTRY_SIZE 32
+
+/** Version of the hypervisor interface defined by this file */
+#define _HV_VERSION 10
+
+/* Index into hypervisor interface dispatch code blocks.
+ *
+ * Hypervisor calls are invoked from user space by calling code
+ * at an address HV_BASE_ADDRESS + (index) * HV_DISPATCH_ENTRY_SIZE,
+ * where index is one of these enum values.
+ *
+ * Normally a supervisor is expected to produce a set of symbols
+ * starting at HV_BASE_ADDRESS that obey this convention, but a user
+ * program could call directly through function pointers if desired.
+ *
+ * These numbers are part of the binary API and will not be changed
+ * without updating HV_VERSION, which should be a rare event.
+ */
+
+/** reserved. */
+#define _HV_DISPATCH_RESERVED                     0
+
+/** hv_init  */
+#define HV_DISPATCH_INIT                          1
+
+/** hv_install_context */
+#define HV_DISPATCH_INSTALL_CONTEXT               2
+
+/** hv_sysconf */
+#define HV_DISPATCH_SYSCONF                       3
+
+/** hv_get_rtc */
+#define HV_DISPATCH_GET_RTC                       4
+
+/** hv_set_rtc */
+#define HV_DISPATCH_SET_RTC                       5
+
+/** hv_flush_asid */
+#define HV_DISPATCH_FLUSH_ASID                    6
+
+/** hv_flush_page */
+#define HV_DISPATCH_FLUSH_PAGE                    7
+
+/** hv_flush_pages */
+#define HV_DISPATCH_FLUSH_PAGES                   8
+
+/** hv_restart */
+#define HV_DISPATCH_RESTART                       9
+
+/** hv_halt */
+#define HV_DISPATCH_HALT                          10
+
+/** hv_power_off */
+#define HV_DISPATCH_POWER_OFF                     11
+
+/** hv_inquire_physical */
+#define HV_DISPATCH_INQUIRE_PHYSICAL              12
+
+/** hv_inquire_memory_controller */
+#define HV_DISPATCH_INQUIRE_MEMORY_CONTROLLER     13
+
+/** hv_inquire_virtual */
+#define HV_DISPATCH_INQUIRE_VIRTUAL               14
+
+/** hv_inquire_asid */
+#define HV_DISPATCH_INQUIRE_ASID                  15
+
+/** hv_nanosleep */
+#define HV_DISPATCH_NANOSLEEP                     16
+
+/** hv_console_read_if_ready */
+#define HV_DISPATCH_CONSOLE_READ_IF_READY         17
+
+/** hv_console_write */
+#define HV_DISPATCH_CONSOLE_WRITE                 18
+
+/** hv_downcall_dispatch */
+#define HV_DISPATCH_DOWNCALL_DISPATCH             19
+
+/** hv_inquire_topology */
+#define HV_DISPATCH_INQUIRE_TOPOLOGY              20
+
+/** hv_fs_findfile */
+#define HV_DISPATCH_FS_FINDFILE                   21
+
+/** hv_fs_fstat */
+#define HV_DISPATCH_FS_FSTAT                      22
+
+/** hv_fs_pread */
+#define HV_DISPATCH_FS_PREAD                      23
+
+/** hv_physaddr_read64 */
+#define HV_DISPATCH_PHYSADDR_READ64               24
+
+/** hv_physaddr_write64 */
+#define HV_DISPATCH_PHYSADDR_WRITE64              25
+
+/** hv_get_command_line */
+#define HV_DISPATCH_GET_COMMAND_LINE              26
+
+/** hv_set_caching */
+#define HV_DISPATCH_SET_CACHING                   27
+
+/** hv_bzero_page */
+#define HV_DISPATCH_BZERO_PAGE                    28
+
+/** hv_register_message_state */
+#define HV_DISPATCH_REGISTER_MESSAGE_STATE        29
+
+/** hv_send_message */
+#define HV_DISPATCH_SEND_MESSAGE                  30
+
+/** hv_receive_message */
+#define HV_DISPATCH_RECEIVE_MESSAGE               31
+
+/** hv_inquire_context */
+#define HV_DISPATCH_INQUIRE_CONTEXT               32
+
+/** hv_start_all_tiles */
+#define HV_DISPATCH_START_ALL_TILES               33
+
+/** hv_dev_open */
+#define HV_DISPATCH_DEV_OPEN                      34
+
+/** hv_dev_close */
+#define HV_DISPATCH_DEV_CLOSE                     35
+
+/** hv_dev_pread */
+#define HV_DISPATCH_DEV_PREAD                     36
+
+/** hv_dev_pwrite */
+#define HV_DISPATCH_DEV_PWRITE                    37
+
+/** hv_dev_poll */
+#define HV_DISPATCH_DEV_POLL                      38
+
+/** hv_dev_poll_cancel */
+#define HV_DISPATCH_DEV_POLL_CANCEL               39
+
+/** hv_dev_preada */
+#define HV_DISPATCH_DEV_PREADA                    40
+
+/** hv_dev_pwritea */
+#define HV_DISPATCH_DEV_PWRITEA                   41
+
+/** hv_flush_remote */
+#define HV_DISPATCH_FLUSH_REMOTE                  42
+
+/** hv_console_putc */
+#define HV_DISPATCH_CONSOLE_PUTC                  43
+
+/** hv_inquire_tiles */
+#define HV_DISPATCH_INQUIRE_TILES                 44
+
+/** hv_confstr */
+#define HV_DISPATCH_CONFSTR                       45
+
+/** hv_reexec */
+#define HV_DISPATCH_REEXEC                        46
+
+/** hv_set_command_line */
+#define HV_DISPATCH_SET_COMMAND_LINE              47
+
+/** hv_dev_register_intr_state */
+#define HV_DISPATCH_DEV_REGISTER_INTR_STATE       48
+
+/** hv_enable_intr */
+#define HV_DISPATCH_ENABLE_INTR                   49
+
+/** hv_disable_intr */
+#define HV_DISPATCH_DISABLE_INTR                  50
+
+/** hv_trigger_ipi */
+#define HV_DISPATCH_TRIGGER_IPI                   51
+
+/** hv_store_mapping */
+#define HV_DISPATCH_STORE_MAPPING                 52
+
+/** hv_inquire_realpa */
+#define HV_DISPATCH_INQUIRE_REALPA                53
+
+/** hv_flush_all */
+#define HV_DISPATCH_FLUSH_ALL                     54
+
+/** One more than the largest dispatch value */
+#define _HV_DISPATCH_END                          55
+
+
+#ifndef __ASSEMBLER__
+
+#ifdef __KERNEL__
+#include <asm/types.h>
+typedef u32 __hv32;        /**< 32-bit value */
+typedef u64 __hv64;        /**< 64-bit value */
+#else
+#include <stdint.h>
+typedef uint32_t __hv32;   /**< 32-bit value */
+typedef uint64_t __hv64;   /**< 64-bit value */
+#endif
+
+
+/** Hypervisor physical address. */
+typedef __hv64 HV_PhysAddr;
+
+#if CHIP_VA_WIDTH() > 32
+/** Hypervisor virtual address. */
+typedef __hv64 HV_VirtAddr;
+#else
+/** Hypervisor virtual address. */
+typedef __hv32 HV_VirtAddr;
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Hypervisor ASID. */
+typedef unsigned int HV_ASID;
+
+/** Hypervisor tile location for a memory access
+ * ("location overridden target").
+ */
+typedef unsigned int HV_LOTAR;
+
+/** Hypervisor size of a page. */
+typedef unsigned long HV_PageSize;
+
+/** A page table entry.
+ */
+typedef struct
+{
+  __hv64 val;                /**< Value of PTE */
+} HV_PTE;
+
+/** Hypervisor error code. */
+typedef int HV_Errno;
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_OK           0    /**< No error */
+#define HV_EINVAL      -801  /**< Invalid argument */
+#define HV_ENODEV      -802  /**< No such device */
+#define HV_ENOENT      -803  /**< No such file or directory */
+#define HV_EBADF       -804  /**< Bad file number */
+#define HV_EFAULT      -805  /**< Bad address */
+#define HV_ERECIP      -806  /**< Bad recipients */
+#define HV_E2BIG       -807  /**< Message too big */
+#define HV_ENOTSUP     -808  /**< Service not supported */
+#define HV_EBUSY       -809  /**< Device busy */
+#define HV_ENOSYS      -810  /**< Invalid syscall */
+#define HV_EPERM       -811  /**< No permission */
+#define HV_ENOTREADY   -812  /**< Device not ready */
+#define HV_EIO         -813  /**< I/O error */
+#define HV_ENOMEM      -814  /**< Out of memory */
+
+#define HV_ERR_MAX     -801  /**< Largest HV error code */
+#define HV_ERR_MIN     -814  /**< Smallest HV error code */
+
+#ifndef __ASSEMBLER__
+
+/** Pass HV_VERSION to hv_init to request this version of the interface. */
+typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
+
+/** Initializes the hypervisor.
+ *
+ * @param interface_version_number The version of the hypervisor interface
+ * that this program expects, typically HV_VERSION.
+ * @param chip_num Architecture number of the chip the client was built for.
+ * @param chip_rev_num Revision number of the chip the client was built for.
+ */
+void hv_init(HV_VersionNumber interface_version_number,
+             int chip_num, int chip_rev_num);
+
+
+/** Queries we can make for hv_sysconf().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_SYSCONF_RESERVED       = 0,
+
+  /** The length of the glue section containing the hv_ procs, in bytes. */
+  HV_SYSCONF_GLUE_SIZE       = 1,
+
+  /** The size of small pages, in bytes. */
+  HV_SYSCONF_PAGE_SIZE_SMALL = 2,
+
+  /** The size of large pages, in bytes. */
+  HV_SYSCONF_PAGE_SIZE_LARGE = 3,
+
+  /** Processor clock speed, in hertz. */
+  HV_SYSCONF_CPU_SPEED       = 4,
+
+  /** Processor temperature, in degrees Kelvin.  The value
+   *  HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+   *  Celsius.  If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+   *  that the temperature has hit an upper limit and is no longer being
+   *  accurately tracked.
+   */
+  HV_SYSCONF_CPU_TEMP        = 5,
+
+  /** Board temperature, in degrees Kelvin.  The value
+   *  HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+   *  Celsius.  If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+   *  that the temperature has hit an upper limit and is no longer being
+   *  accurately tracked.
+   */
+  HV_SYSCONF_BOARD_TEMP      = 6
+
+} HV_SysconfQuery;
+
+/** Offset to subtract from returned Kelvin temperature to get degrees
+    Celsius. */
+#define HV_SYSCONF_TEMP_KTOC 273
+
+/** Pseudo-temperature value indicating that the temperature has
+ *  pegged at its upper limit and is no longer accurate; note that this is
+ *  the value after subtracting HV_SYSCONF_TEMP_KTOC. */
+#define HV_SYSCONF_OVERTEMP 999
+
+/** Query a configuration value from the hypervisor.
+ * @param query Which value is requested (HV_SYSCONF_xxx).
+ * @return The requested value, or -1 the requested value is illegal or
+ *         unavailable.
+ */
+long hv_sysconf(HV_SysconfQuery query);
+
+
+/** Queries we can make for hv_confstr().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_CONFSTR_RESERVED        = 0,
+
+  /** Board part number. */
+  HV_CONFSTR_BOARD_PART_NUM   = 1,
+
+  /** Board serial number. */
+  HV_CONFSTR_BOARD_SERIAL_NUM = 2,
+
+  /** Chip serial number. */
+  HV_CONFSTR_CHIP_SERIAL_NUM  = 3,
+
+  /** Board revision level. */
+  HV_CONFSTR_BOARD_REV        = 4,
+
+  /** Hypervisor software version. */
+  HV_CONFSTR_HV_SW_VER        = 5,
+
+  /** The name for this chip model. */
+  HV_CONFSTR_CHIP_MODEL       = 6,
+
+  /** Human-readable board description. */
+  HV_CONFSTR_BOARD_DESC       = 7,
+
+  /** Human-readable description of the hypervisor configuration. */
+  HV_CONFSTR_HV_CONFIG        = 8,
+
+  /** Human-readable version string for the boot image (for instance,
+   *  who built it and when, what configuration file was used). */
+  HV_CONFSTR_HV_CONFIG_VER    = 9,
+
+  /** Mezzanine part number. */
+  HV_CONFSTR_MEZZ_PART_NUM   = 10,
+
+  /** Mezzanine serial number. */
+  HV_CONFSTR_MEZZ_SERIAL_NUM = 11,
+
+  /** Mezzanine revision level. */
+  HV_CONFSTR_MEZZ_REV        = 12,
+
+  /** Human-readable mezzanine description. */
+  HV_CONFSTR_MEZZ_DESC       = 13,
+
+  /** Control path for the onboard network switch. */
+  HV_CONFSTR_SWITCH_CONTROL  = 14,
+
+  /** Chip revision level. */
+  HV_CONFSTR_CHIP_REV        = 15
+
+} HV_ConfstrQuery;
+
+/** Query a configuration string from the hypervisor.
+ *
+ * @param query Identifier for the specific string to be retrieved
+ *        (HV_CONFSTR_xxx).
+ * @param buf Buffer in which to place the string.
+ * @param len Length of the buffer.
+ * @return If query is valid, then the length of the corresponding string,
+ *        including the trailing null; if this is greater than len, the string
+ *        was truncated.  If query is invalid, HV_EINVAL.  If the specified
+ *        buffer is not writable by the client, HV_EFAULT.
+ */
+int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
+
+/** State object used to enable and disable one-shot and level-sensitive
+ *  interrupts. */
+typedef struct
+{
+#if CHIP_VA_WIDTH() > 32
+  __hv64 opaque[2]; /**< No user-serviceable parts inside */
+#else
+  __hv32 opaque[2]; /**< No user-serviceable parts inside */
+#endif
+}
+HV_IntrState;
+
+/** A set of interrupts. */
+typedef __hv32 HV_IntrMask;
+
+/** Tile coordinate */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  int x;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  int y;
+} HV_Coord;
+
+/** The low interrupt numbers are reserved for use by the client in
+ *  delivering IPIs.  Any interrupt numbers higher than this value are
+ *  reserved for use by HV device drivers. */
+#define HV_MAX_IPI_INTERRUPT 7
+
+/** Register an interrupt state object.  This object is used to enable and
+ *  disable one-shot and level-sensitive interrupts.  Once the state is
+ *  registered, the client must not read or write the state object; doing
+ *  so will cause undefined results.
+ *
+ * @param intr_state Pointer to interrupt state object.
+ * @return HV_OK on success, or a hypervisor error code.
+ */
+HV_Errno hv_dev_register_intr_state(HV_IntrState* intr_state);
+
+/** Enable a set of one-shot and level-sensitive interrupts.
+ *
+ * @param intr_state Pointer to interrupt state object.
+ * @param enab_mask Bitmap of interrupts to enable.
+ */
+void hv_enable_intr(HV_IntrState* intr_state, HV_IntrMask enab_mask);
+
+/** Disable a set of one-shot and level-sensitive interrupts.
+ *
+ * @param intr_state Pointer to interrupt state object.
+ * @param disab_mask Bitmap of interrupts to disable.
+ */
+void hv_disable_intr(HV_IntrState* intr_state, HV_IntrMask disab_mask);
+
+/** Trigger a one-shot interrupt on some tile
+ *
+ * @param tile Which tile to interrupt.
+ * @param interrupt Interrupt number to trigger; must be between 0 and
+ *        HV_MAX_IPI_INTERRUPT.
+ * @return HV_OK on success, or a hypervisor error code.
+ */
+HV_Errno hv_trigger_ipi(HV_Coord tile, int interrupt);
+
+/** Store memory mapping in debug memory so that external debugger can read it.
+ * A maximum of 16 entries can be stored.
+ *
+ * @param va VA of memory that is mapped.
+ * @param len Length of mapped memory.
+ * @param pa PA of memory that is mapped.
+ * @return 0 on success, -1 if the maximum number of mappings is exceeded.
+ */
+int hv_store_mapping(HV_VirtAddr va, unsigned int len, HV_PhysAddr pa);
+
+/** Given a client PA and a length, return its real (HV) PA.
+ *
+ * @param cpa Client physical address.
+ * @param len Length of mapped memory.
+ * @return physical address, or -1 if cpa or len is not valid.
+ */
+HV_PhysAddr hv_inquire_realpa(HV_PhysAddr cpa, unsigned int len);
+
+/** RTC return flag for no RTC chip present.
+ */
+#define HV_RTC_NO_CHIP     0x1
+
+/** RTC return flag for low-voltage condition, indicating that battery had
+ * died and time read is unreliable.
+ */
+#define HV_RTC_LOW_VOLTAGE 0x2
+
+/** Date/Time of day */
+typedef struct {
+#if CHIP_WORD_SIZE() > 32
+  __hv64 tm_sec;   /**< Seconds, 0-59 */
+  __hv64 tm_min;   /**< Minutes, 0-59 */
+  __hv64 tm_hour;  /**< Hours, 0-23 */
+  __hv64 tm_mday;  /**< Day of month, 0-30 */
+  __hv64 tm_mon;   /**< Month, 0-11 */
+  __hv64 tm_year;  /**< Years since 1900, 0-199 */
+  __hv64 flags;    /**< Return flags, 0 if no error */
+#else
+  __hv32 tm_sec;   /**< Seconds, 0-59 */
+  __hv32 tm_min;   /**< Minutes, 0-59 */
+  __hv32 tm_hour;  /**< Hours, 0-23 */
+  __hv32 tm_mday;  /**< Day of month, 0-30 */
+  __hv32 tm_mon;   /**< Month, 0-11 */
+  __hv32 tm_year;  /**< Years since 1900, 0-199 */
+  __hv32 flags;    /**< Return flags, 0 if no error */
+#endif
+} HV_RTCTime;
+
+/** Read the current time-of-day clock.
+ * @return HV_RTCTime of current time (GMT).
+ */
+HV_RTCTime hv_get_rtc(void);
+
+
+/** Set the current time-of-day clock.
+ * @param time time to reset time-of-day to (GMT).
+ */
+void hv_set_rtc(HV_RTCTime time);
+
+/** Installs a context, comprising a page table and other attributes.
+ *
+ *  Once this service completes, page_table will be used to translate
+ *  subsequent virtual address references to physical memory.
+ *
+ *  Installing a context does not cause an implicit TLB flush.  Before
+ *  reusing an ASID value for a different address space, the client is
+ *  expected to flush old references from the TLB with hv_flush_asid().
+ *  (Alternately, hv_flush_all() may be used to flush many ASIDs at once.)
+ *  After invalidating a page table entry, changing its attributes, or
+ *  changing its target CPA, the client is expected to flush old references
+ *  from the TLB with hv_flush_page() or hv_flush_pages(). Making a
+ *  previously invalid page valid does not require a flush.
+ *
+ *  Specifying an invalid ASID, or an invalid CPA (client physical address)
+ *  (either as page_table_pointer, or within the referenced table),
+ *  or another page table data item documented as above as illegal may
+ *  lead to client termination; since the validation of the table is
+ *  done as needed, this may happen before the service returns, or at
+ *  some later time, or never, depending upon the client's pattern of
+ *  memory references.  Page table entries which supply translations for
+ *  invalid virtual addresses may result in client termination, or may
+ *  be silently ignored.  "Invalid" in this context means a value which
+ *  was not provided to the client via the appropriate hv_inquire_* routine.
+ *
+ *  To support changing the instruction VAs at the same time as
+ *  installing the new page table, this call explicitly supports
+ *  setting the "lr" register to a different address and then jumping
+ *  directly to the hv_install_context() routine.  In this case, the
+ *  new page table does not need to contain any mapping for the
+ *  hv_install_context address itself.
+ *
+ * @param page_table Root of the page table.
+ * @param access PTE providing info on how to read the page table.  This
+ *   value must be consistent between multiple tiles sharing a page table,
+ *   and must also be consistent with any virtual mappings the client
+ *   may be using to access the page table.
+ * @param asid HV_ASID the page table is to be used for.
+ * @param flags Context flags, denoting attributes or privileges of the
+ *   current context (HV_CTX_xxx).
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
+                       __hv32 flags);
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_CTX_DIRECTIO     0x1   /**< Direct I/O requests are accepted from
+                                       PL0. */
+
+#ifndef __ASSEMBLER__
+
+/** Value returned from hv_inquire_context(). */
+typedef struct
+{
+  /** Physical address of page table */
+  HV_PhysAddr page_table;
+
+  /** PTE which defines access method for top of page table */
+  HV_PTE access;
+
+  /** ASID associated with this page table */
+  HV_ASID asid;
+
+  /** Context flags */
+  __hv32 flags;
+} HV_Context;
+
+/** Retrieve information about the currently installed context.
+ * @return The data passed to the last successful hv_install_context call.
+ */
+HV_Context hv_inquire_context(void);
+
+
+/** Flushes all translations associated with the named address space
+ *  identifier from the TLB and any other hypervisor data structures.
+ *  Translations installed with the "global" bit are not flushed.
+ *
+ *  Specifying an invalid ASID may lead to client termination.  "Invalid"
+ *  in this context means a value which was not provided to the client
+ *  via <tt>hv_inquire_asid()</tt>.
+ *
+ * @param asid HV_ASID whose entries are to be flushed.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_asid(HV_ASID asid);
+
+
+/** Flushes all translations associated with the named virtual address
+ *  and page size from the TLB and other hypervisor data structures. Only
+ *  pages visible to the current ASID are affected; note that this includes
+ *  global pages in addition to pages specific to the current ASID.
+ *
+ *  The supplied VA need not be aligned; it may be anywhere in the
+ *  subject page.
+ *
+ *  Specifying an invalid virtual address may lead to client termination,
+ *  or may silently succeed.  "Invalid" in this context means a value
+ *  which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param address Address of the page to flush.
+ * @param page_size Size of pages to assume.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_page(HV_VirtAddr address, HV_PageSize page_size);
+
+
+/** Flushes all translations associated with the named virtual address range
+ *  and page size from the TLB and other hypervisor data structures. Only
+ *  pages visible to the current ASID are affected; note that this includes
+ *  global pages in addition to pages specific to the current ASID.
+ *
+ *  The supplied VA need not be aligned; it may be anywhere in the
+ *  subject page.
+ *
+ *  Specifying an invalid virtual address may lead to client termination,
+ *  or may silently succeed.  "Invalid" in this context means a value
+ *  which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param start Address to flush.
+ * @param page_size Size of pages to assume.
+ * @param size The number of bytes to flush. Any page in the range
+ *        [start, start + size) will be flushed from the TLB.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_pages(HV_VirtAddr start, HV_PageSize page_size,
+                   unsigned long size);
+
+
+/** Flushes all non-global translations (if preserve_global is true),
+ *  or absolutely all translations (if preserve_global is false).
+ *
+ * @param preserve_global Non-zero if we want to preserve "global" mappings.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_all(int preserve_global);
+
+
+/** Restart machine with optional restart command and optional args.
+ * @param cmd Const pointer to command to restart with, or NULL
+ * @param args Const pointer to argument string to restart with, or NULL
+ */
+void hv_restart(HV_VirtAddr cmd, HV_VirtAddr args);
+
+
+/** Halt machine. */
+void hv_halt(void);
+
+
+/** Power off machine. */
+void hv_power_off(void);
+
+
+/** Re-enter virtual-is-physical memory translation mode and restart
+ *  execution at a given address.
+ * @param entry Client physical address at which to begin execution.
+ * @return A hypervisor error code on failure; if the operation is
+ *         successful the call does not return.
+ */
+int hv_reexec(HV_PhysAddr entry);
+
+
+/** Chip topology */
+typedef struct
+{
+  /** Relative coordinates of the querying tile */
+  HV_Coord coord;
+
+  /** Width of the querying supervisor's tile rectangle. */
+  int width;
+
+  /** Height of the querying supervisor's tile rectangle. */
+  int height;
+
+} HV_Topology;
+
+/** Returns information about the tile coordinate system.
+ *
+ * Each supervisor is given a rectangle of tiles it potentially controls.
+ * These tiles are labeled using a relative coordinate system with (0,0) as
+ * the upper left tile regardless of their physical location on the chip.
+ *
+ * This call returns both the size of that rectangle and the position
+ * within that rectangle of the querying tile.
+ *
+ * Not all tiles within that rectangle may be available to the supervisor;
+ * to get the precise set of available tiles, you must also call
+ * hv_inquire_tiles(HV_INQ_TILES_AVAIL, ...).
+ **/
+HV_Topology hv_inquire_topology(void);
+
+/** Sets of tiles we can retrieve with hv_inquire_tiles().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_INQ_TILES_RESERVED       = 0,
+
+  /** All available tiles within the supervisor's tile rectangle. */
+  HV_INQ_TILES_AVAIL           = 1,
+
+  /** The set of tiles used for hash-for-home caching. */
+  HV_INQ_TILES_HFH_CACHE       = 2,
+
+  /** The set of tiles that can be legally used as a LOTAR for a PTE. */
+  HV_INQ_TILES_LOTAR           = 3
+} HV_InqTileSet;
+
+/** Returns specific information about various sets of tiles within the
+ *  supervisor's tile rectangle.
+ *
+ * @param set Which set of tiles to retrieve.
+ * @param cpumask Pointer to a returned bitmask (in row-major order,
+ *        supervisor-relative) of tiles.  The low bit of the first word
+ *        corresponds to the tile at the upper left-hand corner of the
+ *        supervisor's rectangle.  In order for the supervisor to know the
+ *        buffer length to supply, it should first call hv_inquire_topology.
+ * @param length Number of bytes available for the returned bitmask.
+ **/
+HV_Errno hv_inquire_tiles(HV_InqTileSet set, HV_VirtAddr cpumask, int length);
+
+
+/** An identifier for a memory controller. Multiple memory controllers
+ * may be connected to one chip, and this uniquely identifies each one.
+ */
+typedef int HV_MemoryController;
+
+/** A range of physical memory. */
+typedef struct
+{
+  HV_PhysAddr start;   /**< Starting address. */
+  __hv64 size;         /**< Size in bytes. */
+  HV_MemoryController controller;  /**< Which memory controller owns this. */
+} HV_PhysAddrRange;
+
+/** Returns information about a range of physical memory.
+ *
+ * hv_inquire_physical() returns one of the ranges of client
+ * physical addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients might not be prepared to deal with more than one
+ * physical address range; they still ought to call this routine and
+ * issue a warning message if they're given more than one range, on the
+ * theory that whoever configured the hypervisor to provide that memory
+ * should know that it's being wasted.
+ */
+HV_PhysAddrRange hv_inquire_physical(int idx);
+
+
+/** Memory controller information. */
+typedef struct
+{
+  HV_Coord coord;   /**< Relative tile coordinates of the port used by a
+                         specified tile to communicate with this controller. */
+  __hv64 speed;     /**< Speed of this controller in bytes per second. */
+} HV_MemoryControllerInfo;
+
+/** Returns information about a particular memory controller.
+ *
+ *  hv_inquire_memory_controller(coord,idx) returns information about a
+ *  particular controller.  Two pieces of information are returned:
+ *  - The relative coordinates of the port on the controller that the specified
+ *    tile would use to contact it.  The relative coordinates may lie
+ *    outside the supervisor's rectangle, i.e. the controller may not
+ *    be attached to a node managed by the querying node's supervisor.
+ *    In particular note that x or y may be negative.
+ *  - The speed of the memory controller.  (This is a not-to-exceed value
+ *    based on the raw hardware data rate, and may not be achievable in
+ *    practice; it is provided to give clients information on the relative
+ *    performance of the available controllers.)
+ *
+ *  Clients should avoid calling this interface with invalid values.
+ *  A client who does may be terminated.
+ * @param coord Tile for which to calculate the relative port position.
+ * @param controller Index of the controller; identical to value returned
+ *        from other routines like hv_inquire_physical.
+ * @return Information about the controller.
+ */
+HV_MemoryControllerInfo hv_inquire_memory_controller(HV_Coord coord,
+                                                     int controller);
+
+
+/** A range of virtual memory. */
+typedef struct
+{
+  HV_VirtAddr start;   /**< Starting address. */
+  __hv64 size;         /**< Size in bytes. */
+} HV_VirtAddrRange;
+
+/** Returns information about a range of virtual memory.
+ *
+ * hv_inquire_virtual() returns one of the ranges of client
+ * virtual addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients may well have various virtual addresses hardwired
+ * into themselves; for instance, their instruction stream may
+ * have been compiled expecting to live at a particular address.
+ * Such clients should use this interface to verify they've been
+ * given the virtual address space they expect, and issue a (potentially
+ * fatal) warning message otherwise.
+ *
+ * Note that the returned size is a __hv64, not a __hv32, so it is
+ * possible to express a single range spanning the entire 32-bit
+ * address space.
+ */
+HV_VirtAddrRange hv_inquire_virtual(int idx);
+
+
+/** A range of ASID values. */
+typedef struct
+{
+  HV_ASID start;        /**< First ASID in the range. */
+  unsigned int size;    /**< Number of ASIDs. Zero for an invalid range. */
+} HV_ASIDRange;
+
+/** Returns information about a range of ASIDs.
+ *
+ * hv_inquire_asid() returns one of the ranges of address
+ * space identifiers which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start value (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available ASIDs are described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ */
+HV_ASIDRange hv_inquire_asid(int idx);
+
+
+/** Waits for at least the specified number of nanoseconds then returns.
+ *
+ * @param nanosecs The number of nanoseconds to sleep.
+ */
+void hv_nanosleep(int nanosecs);
+
+
+/** Reads a character from the console without blocking.
+ *
+ * @return A value from 0-255 indicates the value successfully read.
+ * A negative value means no value was ready.
+ */
+int hv_console_read_if_ready(void);
+
+
+/** Writes a character to the console, blocking if the console is busy.
+ *
+ *  This call cannot fail. If the console is broken for some reason,
+ *  output will simply vanish.
+ * @param byte Character to write.
+ */
+void hv_console_putc(int byte);
+
+
+/** Writes a string to the console, blocking if the console is busy.
+ * @param bytes Pointer to characters to write.
+ * @param len Number of characters to write.
+ * @return Number of characters written, or HV_EFAULT if the buffer is invalid.
+ */
+int hv_console_write(HV_VirtAddr bytes, int len);
+
+
+/** Dispatch the next interrupt from the client downcall mechanism.
+ *
+ *  The hypervisor uses downcalls to notify the client of asynchronous
+ *  events.  Some of these events are hypervisor-created (like incoming
+ *  messages).  Some are regular interrupts which initially occur in
+ *  the hypervisor, and are normally handled directly by the client;
+ *  when these occur in a client's interrupt critical section, they must
+ *  be delivered through the downcall mechanism.
+ *
+ *  A downcall is initially delivered to the client as an INTCTRL_1
+ *  interrupt.  Upon entry to the INTCTRL_1 vector, the client must
+ *  immediately invoke the hv_downcall_dispatch service.  This service
+ *  will not return; instead it will cause one of the client's actual
+ *  downcall-handling interrupt vectors to be entered.  The EX_CONTEXT
+ *  registers in the client will be set so that when the client irets,
+ *  it will return to the code which was interrupted by the INTCTRL_1
+ *  interrupt.
+ *
+ *  Any saving of registers should be done by the actual handling
+ *  vectors; no registers should be changed by the INTCTRL_1 handler.
+ *  In particular, the client should not use a jal instruction to invoke
+ *  the hv_downcall_dispatch service, as that would overwrite the client's
+ *  lr register.  Note that the hv_downcall_dispatch service may overwrite
+ *  one or more of the client's system save registers.
+ *
+ *  The client must not modify the INTCTRL_1_STATUS SPR.  The hypervisor
+ *  will set this register to cause a downcall to happen, and will clear
+ *  it when no further downcalls are pending.
+ *
+ *  When a downcall vector is entered, the INTCTRL_1 interrupt will be
+ *  masked.  When the client is done processing a downcall, and is ready
+ *  to accept another, it must unmask this interrupt; if more downcalls
+ *  are pending, this will cause the INTCTRL_1 vector to be reentered.
+ *  Currently the following interrupt vectors can be entered through a
+ *  downcall:
+ *
+ *  INT_MESSAGE_RCV_DWNCL   (hypervisor message available)
+ *  INT_DMATLB_MISS_DWNCL   (DMA TLB miss)
+ *  INT_SNITLB_MISS_DWNCL   (SNI TLB miss)
+ *  INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)
+ */
+void hv_downcall_dispatch(void);
+
+#endif /* !__ASSEMBLER__ */
+
+/** We use actual interrupt vectors which never occur (they're only there
+ *  to allow setting MPLs for related SPRs) for our downcall vectors.
+ */
+/** Message receive downcall interrupt vector */
+#define INT_MESSAGE_RCV_DWNCL    INT_BOOT_ACCESS
+/** DMA TLB miss downcall interrupt vector */
+#define INT_DMATLB_MISS_DWNCL    INT_DMA_ASID
+/** Static nework processor instruction TLB miss interrupt vector */
+#define INT_SNITLB_MISS_DWNCL    INT_SNI_ASID
+/** DMA TLB access violation downcall interrupt vector */
+#define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
+/** Device interrupt downcall interrupt vector */
+#define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+
+#ifndef __ASSEMBLER__
+
+/** Requests the inode for a specific full pathname.
+ *
+ * Performs a lookup in the hypervisor filesystem for a given filename.
+ * Multiple calls with the same filename will always return the same inode.
+ * If there is no such filename, HV_ENOENT is returned.
+ * A bad filename pointer may result in HV_EFAULT instead.
+ *
+ * @param filename Constant pointer to name of requested file
+ * @return Inode of requested file
+ */
+int hv_fs_findfile(HV_VirtAddr filename);
+
+
+/** Data returned from an fstat request.
+ * Note that this structure should be no more than 40 bytes in size so
+ * that it can always be returned completely in registers.
+ */
+typedef struct
+{
+  int size;             /**< Size of file (or HV_Errno on error) */
+  unsigned int flags;   /**< Flags (see HV_FS_FSTAT_FLAGS) */
+} HV_FS_StatInfo;
+
+/** Bitmask flags for fstat request */
+typedef enum
+{
+  HV_FS_ISDIR    = 0x0001   /**< Is the entry a directory? */
+} HV_FS_FSTAT_FLAGS;
+
+/** Get stat information on a given file inode.
+ *
+ * Return information on the file with the given inode.
+ *
+ * IF the HV_FS_ISDIR bit is set, the "file" is a directory.  Reading
+ * it will return NUL-separated filenames (no directory part) relative
+ * to the path to the inode of the directory "file".  These can be
+ * appended to the path to the directory "file" after a forward slash
+ * to create additional filenames.  Note that it is not required
+ * that all valid paths be decomposable into valid parent directories;
+ * a filesystem may validly have just a few files, none of which have
+ * HV_FS_ISDIR set.  However, if clients may wish to enumerate the
+ * files in the filesystem, it is recommended to include all the
+ * appropriate parent directory "files" to give a consistent view.
+ *
+ * An invalid file inode will cause an HV_EBADF error to be returned.
+ *
+ * @param inode The inode number of the query
+ * @return An HV_FS_StatInfo structure
+ */
+HV_FS_StatInfo hv_fs_fstat(int inode);
+
+
+/** Read data from a specific hypervisor file.
+ * On error, may return HV_EBADF for a bad inode or HV_EFAULT for a bad buf.
+ * Reads near the end of the file will return fewer bytes than requested.
+ * Reads at or beyond the end of a file will return zero.
+ *
+ * @param inode the hypervisor file to read
+ * @param buf the buffer to read data into
+ * @param length the number of bytes of data to read
+ * @param offset the offset into the file to read the data from
+ * @return number of bytes successfully read, or an HV_Errno code
+ */
+int hv_fs_pread(int inode, HV_VirtAddr buf, int length, int offset);
+
+
+/** Read a 64-bit word from the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to read
+ * @param access The PTE describing how to read the memory
+ * @return The 64-bit value read from the given address
+ */
+unsigned long long hv_physaddr_read64(HV_PhysAddr addr, HV_PTE access);
+
+
+/** Write a 64-bit word to the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to write
+ * @param access The PTE that says how to write the memory
+ * @param val The 64-bit value to write to the given address
+ */
+void hv_physaddr_write64(HV_PhysAddr addr, HV_PTE access,
+                         unsigned long long val);
+
+
+/** Get the value of the command-line for the supervisor, if any.
+ * This will not include the filename of the booted supervisor, but may
+ * include configured-in boot arguments or the hv_restart() arguments.
+ * If the buffer is not long enough the hypervisor will NUL the first
+ * character of the buffer but not write any other data.
+ * @param buf The virtual address to write the command-line string to.
+ * @param length The length of buf, in characters.
+ * @return The actual length of the command line, including the trailing NUL
+ *         (may be larger than "length").
+ */
+int hv_get_command_line(HV_VirtAddr buf, int length);
+
+
+/** Set a new value for the command-line for the supervisor, which will
+ *  be returned from subsequent invocations of hv_get_command_line() on
+ *  this tile.
+ * @param buf The virtual address to read the command-line string from.
+ * @param length The length of buf, in characters; must be no more than
+ *        HV_COMMAND_LINE_LEN.
+ * @return Zero if successful, or a hypervisor error code.
+ */
+HV_Errno hv_set_command_line(HV_VirtAddr buf, int length);
+
+/** Maximum size of a command line passed to hv_set_command_line(); note
+ *  that a line returned from hv_get_command_line() could be larger than
+ *  this.*/
+#define HV_COMMAND_LINE_LEN  256
+
+/** Tell the hypervisor how to cache non-priority pages
+ * (its own as well as pages explicitly represented in page tables).
+ * Normally these will be represented as red/black pages, but
+ * when the supervisor starts to allocate "priority" pages in the PTE
+ * the hypervisor will need to start marking those pages as (e.g.) "red"
+ * and non-priority pages as either "black" (if they cache-alias
+ * with the existing priority pages) or "red/black" (if they don't).
+ * The bitmask provides information on which parts of the cache
+ * have been used for pinned pages so far on this tile; if (1 << N)
+ * appears in the bitmask, that indicates that a page has been marked
+ * "priority" whose PFN equals N, mod 8.
+ * @param bitmask A bitmap of priority page set values
+ */
+void hv_set_caching(unsigned int bitmask);
+
+
+/** Zero out a specified number of pages.
+ * The va and size must both be multiples of 4096.
+ * Caches are bypassed and memory is directly set to zero.
+ * This API is implemented only in the magic hypervisor and is intended
+ * to provide a performance boost to the minimal supervisor by
+ * giving it a fast way to zero memory pages when allocating them.
+ * @param va Virtual address where the page has been mapped
+ * @param size Number of bytes (must be a page size multiple)
+ */
+void hv_bzero_page(HV_VirtAddr va, unsigned int size);
+
+
+/** State object for the hypervisor messaging subsystem. */
+typedef struct
+{
+#if CHIP_VA_WIDTH() > 32
+  __hv64 opaque[2]; /**< No user-serviceable parts inside */
+#else
+  __hv32 opaque[2]; /**< No user-serviceable parts inside */
+#endif
+}
+HV_MsgState;
+
+/** Register to receive incoming messages.
+ *
+ *  This routine configures the current tile so that it can receive
+ *  incoming messages.  It must be called before the client can receive
+ *  messages with the hv_receive_message routine, and must be called on
+ *  each tile which will receive messages.
+ *
+ *  msgstate is the virtual address of a state object of type HV_MsgState.
+ *  Once the state is registered, the client must not read or write the
+ *  state object; doing so will cause undefined results.
+ *
+ *  If this routine is called with msgstate set to 0, the client's message
+ *  state will be freed and it will no longer be able to receive messages.
+ *  Note that this may cause the loss of any as-yet-undelivered messages
+ *  for the client.
+ *
+ *  If another client attempts to send a message to a client which has
+ *  not yet called hv_register_message_state, or which has freed its
+ *  message state, the message will not be delivered, as if the client
+ *  had insufficient buffering.
+ *
+ *  This routine returns HV_OK if the registration was successful, and
+ *  HV_EINVAL if the supplied state object is unsuitable.  Note that some
+ *  errors may not be detected during this routine, but might be detected
+ *  during a subsequent message delivery.
+ * @param msgstate State object.
+ **/
+HV_Errno hv_register_message_state(HV_MsgState* msgstate);
+
+/** Possible message recipient states. */
+typedef enum
+{
+  HV_TO_BE_SENT,    /**< Not sent (not attempted, or recipient not ready) */
+  HV_SENT,          /**< Successfully sent */
+  HV_BAD_RECIP      /**< Bad recipient coordinates (permanent error) */
+} HV_Recip_State;
+
+/** Message recipient. */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  unsigned int x:11;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  unsigned int y:11;
+
+  /** Status of this recipient */
+  HV_Recip_State state:10;
+} HV_Recipient;
+
+/** Send a message to a set of recipients.
+ *
+ *  This routine sends a message to a set of recipients.
+ *
+ *  recips is an array of HV_Recipient structures.  Each specifies a tile,
+ *  and a message state; initially, it is expected that the state will
+ *  be set to HV_TO_BE_SENT.  nrecip specifies the number of recipients
+ *  in the recips array.
+ *
+ *  For each recipient whose state is HV_TO_BE_SENT, the hypervisor attempts
+ *  to send that tile the specified message.  In order to successfully
+ *  receive the message, the receiver must be a valid tile to which the
+ *  sender has access, must not be the sending tile itself, and must have
+ *  sufficient free buffer space.  (The hypervisor guarantees that each
+ *  tile which has called hv_register_message_state() will be able to
+ *  buffer one message from every other tile which can legally send to it;
+ *  more space may be provided but is not guaranteed.)  If an invalid tile
+ *  is specified, the recipient's state is set to HV_BAD_RECIP; this is a
+ *  permanent delivery error.  If the message is successfully delivered
+ *  to the recipient's buffer, the recipient's state is set to HV_SENT.
+ *  Otherwise, the recipient's state is unchanged.  Message delivery is
+ *  synchronous; all attempts to send messages are completed before this
+ *  routine returns.
+ *
+ *  If no permanent delivery errors were encountered, the routine returns
+ *  the number of messages successfully sent: that is, the number of
+ *  recipients whose states changed from HV_TO_BE_SENT to HV_SENT during
+ *  this operation.  If any permanent delivery errors were encountered,
+ *  the routine returns HV_ERECIP.  In the event of permanent delivery
+ *  errors, it may be the case that delivery was not attempted to all
+ *  recipients; if any messages were succesfully delivered, however,
+ *  recipients' state values will be updated appropriately.
+ *
+ *  It is explicitly legal to specify a recipient structure whose state
+ *  is not HV_TO_BE_SENT; such a recipient is ignored.  One suggested way
+ *  of using hv_send_message to send a message to multiple tiles is to set
+ *  up a list of recipients, and then call the routine repeatedly with the
+ *  same list, each time accumulating the number of messages successfully
+ *  sent, until all messages are sent, a permanent error is encountered,
+ *  or the desired number of attempts have been made.  When used in this
+ *  way, the routine will deliver each message no more than once to each
+ *  recipient.
+ *
+ *  Note that a message being successfully delivered to the recipient's
+ *  buffer space does not guarantee that it is received by the recipient,
+ *  either immediately or at any time in the future; the recipient might
+ *  never call hv_receive_message, or could register a different state
+ *  buffer, losing the message.
+ *
+ *  Specifiying the same recipient more than once in the recipient list
+ *  is an error, which will not result in an error return but which may
+ *  or may not result in more than one message being delivered to the
+ *  recipient tile.
+ *
+ *  buf and buflen specify the message to be sent.  buf is a virtual address
+ *  which must be currently mapped in the client's page table; if not, the
+ *  routine returns HV_EFAULT.  buflen must be greater than zero and less
+ *  than or equal to HV_MAX_MESSAGE_SIZE, and nrecip must be less than the
+ *  number of tiles to which the sender has access; if not, the routine
+ *  returns HV_EINVAL.
+ * @param recips List of recipients.
+ * @param nrecip Number of recipients.
+ * @param buf Address of message data.
+ * @param buflen Length of message data.
+ **/
+int hv_send_message(HV_Recipient *recips, int nrecip,
+                    HV_VirtAddr buf, int buflen);
+
+/** Maximum hypervisor message size, in bytes */
+#define HV_MAX_MESSAGE_SIZE 28
+
+
+/** Return value from hv_receive_message() */
+typedef struct
+{
+  int msglen;     /**< Message length in bytes, or an error code */
+  __hv32 source;  /**< Code identifying message sender (HV_MSG_xxx) */
+} HV_RcvMsgInfo;
+
+#define HV_MSG_TILE 0x0         /**< Message source is another tile */
+#define HV_MSG_INTR 0x1         /**< Message source is a driver interrupt */
+
+/** Receive a message.
+ *
+ * This routine retrieves a message from the client's incoming message
+ * buffer.
+ *
+ * Multiple messages sent from a particular sending tile to a particular
+ * receiving tile are received in the order that they were sent; however,
+ * no ordering is guaranteed between messages sent by different tiles.
+ *
+ * Whenever the a client's message buffer is empty, the first message
+ * subsequently received will cause the client's MESSAGE_RCV_DWNCL
+ * interrupt vector to be invoked through the interrupt downcall mechanism
+ * (see the description of the hv_downcall_dispatch() routine for details
+ * on downcalls).
+ *
+ * Another message-available downcall will not occur until a call to
+ * this routine is made when the message buffer is empty, and a message
+ * subsequently arrives.  Note that such a downcall could occur while
+ * this routine is executing.  If the calling code does not wish this
+ * to happen, it is recommended that this routine be called with the
+ * INTCTRL_1 interrupt masked, or inside an interrupt critical section.
+ *
+ * msgstate is the value previously passed to hv_register_message_state().
+ * buf is the virtual address of the buffer into which the message will
+ * be written; buflen is the length of the buffer.
+ *
+ * This routine returns an HV_RcvMsgInfo structure.  The msglen member
+ * of that structure is the length of the message received, zero if no
+ * message is available, or HV_E2BIG if the message is too large for the
+ * specified buffer.  If the message is too large, it is not consumed,
+ * and may be retrieved by a subsequent call to this routine specifying
+ * a sufficiently large buffer.  A buffer which is HV_MAX_MESSAGE_SIZE
+ * bytes long is guaranteed to be able to receive any possible message.
+ *
+ * The source member of the HV_RcvMsgInfo structure describes the sender
+ * of the message.  For messages sent by another client tile via an
+ * hv_send_message() call, this value is HV_MSG_TILE; for messages sent
+ * as a result of a device interrupt, this value is HV_MSG_INTR.
+ */
+
+HV_RcvMsgInfo hv_receive_message(HV_MsgState msgstate, HV_VirtAddr buf,
+                                 int buflen);
+
+
+/** Start remaining tiles owned by this supervisor.  Initially, only one tile
+ *  executes the client program; after it calls this service, the other tiles
+ *  are started.  This allows the initial tile to do one-time configuration
+ *  of shared data structures without having to lock them against simultaneous
+ *  access.
+ */
+void hv_start_all_tiles(void);
+
+
+/** Open a hypervisor device.
+ *
+ *  This service initializes an I/O device and its hypervisor driver software,
+ *  and makes it available for use.  The open operation is per-device per-chip;
+ *  once it has been performed, the device handle returned may be used in other
+ *  device services calls made by any tile.
+ *
+ * @param name Name of the device.  A base device name is just a text string
+ *        (say, "pcie").  If there is more than one instance of a device, the
+ *        base name is followed by a slash and a device number (say, "pcie/0").
+ *        Some devices may support further structure beneath those components;
+ *        most notably, devices which require control operations do so by
+ *        supporting reads and/or writes to a control device whose name
+ *        includes a trailing "/ctl" (say, "pcie/0/ctl").
+ * @param flags Flags (HV_DEV_xxx).
+ * @return A positive integer device handle, or a negative error code.
+ */
+int hv_dev_open(HV_VirtAddr name, __hv32 flags);
+
+
+/** Close a hypervisor device.
+ *
+ *  This service uninitializes an I/O device and its hypervisor driver
+ *  software, and makes it unavailable for use.  The close operation is
+ *  per-device per-chip; once it has been performed, the device is no longer
+ *  available.  Normally there is no need to ever call the close service.
+ *
+ * @param devhdl Device handle of the device to be closed.
+ * @return Zero if the close is successful, otherwise, a negative error code.
+ */
+int hv_dev_close(int devhdl);
+
+
+/** Read data from a hypervisor device synchronously.
+ *
+ *  This service transfers data from a hypervisor device to a memory buffer.
+ *  When the service returns, the data has been written from the memory buffer,
+ *  and the buffer will not be further modified by the driver.
+ *
+ *  No ordering is guaranteed between requests issued from different tiles.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous read
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the target data buffer.  This buffer must
+ *        be mapped in the currently installed page table; if not, HV_EFAULT
+ *        may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @return A non-negative value if the read was at least partially successful;
+ *         otherwise, a negative error code.  The precise interpretation of
+ *         the return value is driver-dependent, but many drivers will return
+ *         the number of bytes successfully transferred.
+ */
+int hv_dev_pread(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+                 __hv64 offset);
+
+#define HV_DEV_NB_EMPTY     0x1   /**< Don't block when no bytes of data can
+                                       be transferred. */
+#define HV_DEV_NB_PARTIAL   0x2   /**< Don't block when some bytes, but not all
+                                       of the requested bytes, can be
+                                       transferred. */
+#define HV_DEV_NOCACHE      0x4   /**< The caller warrants that none of the
+                                       cache lines which might contain data
+                                       from the requested buffer are valid.
+                                       Useful with asynchronous operations
+                                       only. */
+
+#define HV_DEV_ALLFLAGS     (HV_DEV_NB_EMPTY | HV_DEV_NB_PARTIAL | \
+                             HV_DEV_NOCACHE)   /**< All HV_DEV_xxx flags */
+
+/** Write data to a hypervisor device synchronously.
+ *
+ *  This service transfers data from a memory buffer to a hypervisor device.
+ *  When the service returns, the data has been read from the memory buffer,
+ *  and the buffer may be overwritten by the client; the data may not
+ *  necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ *  No ordering is guaranteed between requests issued from different tiles.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous write
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be written to.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the source data buffer.  This buffer must
+ *        be mapped in the currently installed page table; if not, HV_EFAULT
+ *        may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @return A non-negative value if the write was at least partially successful;
+ *         otherwise, a negative error code.  The precise interpretation of
+ *         the return value is driver-dependent, but many drivers will return
+ *         the number of bytes successfully transferred.
+ */
+int hv_dev_pwrite(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+                  __hv64 offset);
+
+
+/** Interrupt arguments, used in the asynchronous I/O interfaces. */
+#if CHIP_VA_WIDTH() > 32
+typedef __hv64 HV_IntArg;
+#else
+typedef __hv32 HV_IntArg;
+#endif
+
+/** Interrupt messages are delivered via the mechanism as normal messages,
+ *  but have a message source of HV_DEV_INTR.  The message is formatted
+ *  as an HV_IntrMsg structure.
+ */
+
+typedef struct
+{
+  HV_IntArg intarg;  /**< Interrupt argument, passed to the poll/preada/pwritea
+                          services */
+  HV_IntArg intdata; /**< Interrupt-specific interrupt data */
+} HV_IntrMsg;
+
+/** Request an interrupt message when a device condition is satisfied.
+ *
+ *  This service requests that an interrupt message be delivered to the
+ *  requesting tile when a device becomes readable or writable, or when any
+ *  data queued to the device via previous write operations from this tile
+ *  has been actually sent out on the hardware I/O interface.  Devices may
+ *  choose to support any, all, or none of the available conditions.
+ *
+ *  If multiple conditions are specified, only one message will be
+ *  delivered.  If the event mask delivered to that interrupt handler
+ *  indicates that some of the conditions have not yet occurred, the
+ *  client must issue another poll() call if it wishes to wait for those
+ *  conditions.
+ *
+ *  Only one poll may be outstanding per device handle per tile.  If more than
+ *  one tile is polling on the same device and condition, they will all be
+ *  notified when it happens.  Because of this, clients may not assume that
+ *  the condition signaled is necessarily still true when they request a
+ *  subsequent service; for instance, the readable data which caused the
+ *  poll call to interrupt may have been read by another tile in the interim.
+ *
+ *  The notification interrupt message could come directly, or via the
+ *  downcall (intctrl1) method, depending on what the tile is doing
+ *  when the condition is satisfied.  Note that it is possible for the
+ *  requested interrupt to be delivered after this service is called but
+ *  before it returns.
+ *
+ * @param devhdl Device handle of the device to be polled.
+ * @param events Flags denoting the events which will cause the interrupt to
+ *        be delivered (HV_DEVPOLL_xxx).
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to a
+ *        mask of HV_DEVPOLL_xxx values indicating which conditions have been
+ *        satisifed.
+ * @return Zero if the interrupt was successfully scheduled; otherwise, a
+ *         negative error code.
+ */
+int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
+
+#define HV_DEVPOLL_READ     0x1   /**< Test device for readability */
+#define HV_DEVPOLL_WRITE    0x2   /**< Test device for writability */
+#define HV_DEVPOLL_FLUSH    0x4   /**< Test device for output drained */
+
+
+/** Cancel a request for an interrupt when a device event occurs.
+ *
+ *  This service requests that no interrupt be delivered when the events
+ *  noted in the last-issued poll() call happen.  Once this service returns,
+ *  the interrupt has been canceled; however, it is possible for the interrupt
+ *  to be delivered after this service is called but before it returns.
+ *
+ * @param devhdl Device handle of the device on which to cancel polling.
+ * @return Zero if the poll was successfully canceled; otherwise, a negative
+ *         error code.
+ */
+int hv_dev_poll_cancel(int devhdl);
+
+
+/** Scatter-gather list for preada/pwritea calls. */
+typedef struct
+#if CHIP_VA_WIDTH() <= 32
+__attribute__ ((packed, aligned(4)))
+#endif
+{
+  HV_PhysAddr pa;  /**< Client physical address of the buffer segment. */
+  HV_PTE pte;      /**< Page table entry describing the caching and location
+                        override characteristics of the buffer segment.  Some
+                        drivers ignore this element and will require that
+                        the NOCACHE flag be set on their requests. */
+  __hv32 len;      /**< Length of the buffer segment. */
+} HV_SGL;
+
+#define HV_SGL_MAXLEN 16  /**< Maximum number of entries in a scatter-gather
+                               list */
+
+/** Read data from a hypervisor device asynchronously.
+ *
+ *  This service transfers data from a hypervisor device to a memory buffer.
+ *  When the service returns, the read has been scheduled.  When the read
+ *  completes, an interrupt message will be delivered, and the buffer will
+ *  not be further modified by the driver.
+ *
+ *  The number of possible outstanding asynchronous requests is defined by
+ *  each driver, but it is recommended that it be at least two requests
+ *  per tile per device.
+ *
+ *  No ordering is guaranteed between synchronous and asynchronous requests,
+ *  even those issued on the same tile.
+ *
+ *  The completion interrupt message could come directly, or via the downcall
+ *  (intctrl1) method, depending on what the tile is doing when the read
+ *  completes.  Interrupts do not coalesce; one is delivered for each
+ *  asynchronous I/O request.  Note that it is possible for the requested
+ *  interrupt to be delivered after this service is called but before it
+ *  returns.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous read
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory to which data will be
+ *        written.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to the
+ *        normal return value from the read request.
+ * @return Zero if the read was successfully scheduled; otherwise, a negative
+ *         error code.  Note that some drivers may choose to pre-validate
+ *         their arguments, and may thus detect certain device error
+ *         conditions at this time rather than when the completion notification
+ *         occurs, but this is not required.
+ */
+int hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+                  HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Write data to a hypervisor device asynchronously.
+ *
+ *  This service transfers data from a memory buffer to a hypervisor
+ *  device.  When the service returns, the write has been scheduled.
+ *  When the write completes, an interrupt message will be delivered,
+ *  and the buffer may be overwritten by the client; the data may not
+ *  necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ *  The number of possible outstanding asynchronous requests is defined by
+ *  each driver, but it is recommended that it be at least two requests
+ *  per tile per device.
+ *
+ *  No ordering is guaranteed between synchronous and asynchronous requests,
+ *  even those issued on the same tile.
+ *
+ *  The completion interrupt message could come directly, or via the downcall
+ *  (intctrl1) method, depending on what the tile is doing when the read
+ *  completes.  Interrupts do not coalesce; one is delivered for each
+ *  asynchronous I/O request.  Note that it is possible for the requested
+ *  interrupt to be delivered after this service is called but before it
+ *  returns.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous write
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory from which data will be
+ *        read.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to the
+ *        normal return value from the write request.
+ * @return Zero if the write was successfully scheduled; otherwise, a negative
+ *         error code.  Note that some drivers may choose to pre-validate
+ *         their arguments, and may thus detect certain device error
+ *         conditions at this time rather than when the completion notification
+ *         occurs, but this is not required.
+ */
+int hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+                   HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Define a pair of tile and ASID to identify a user process context. */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  unsigned int x:11;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  unsigned int y:11;
+
+  /** ASID of the process on this x,y tile */
+  HV_ASID asid:10;
+} HV_Remote_ASID;
+
+/** Flush cache and/or TLB state on remote tiles.
+ *
+ * @param cache_pa Client physical address to flush from cache (ignored if
+ *        the length encoded in cache_control is zero, or if
+ *        HV_FLUSH_EVICT_L2 is set, or if cache_cpumask is NULL).
+ * @param cache_control This argument allows you to specify a length of
+ *        physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
+ *        You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
+ *        You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache.
+ *        HV_FLUSH_ALL flushes all caches.
+ * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
+ *        tile indices to perform cache flush on.  The low bit of the first
+ *        word corresponds to the tile at the upper left-hand corner of the
+ *        supervisor's rectangle.  If passed as a NULL pointer, equivalent
+ *        to an empty bitmask.  On chips which support hash-for-home caching,
+ *        if passed as -1, equivalent to a mask containing tiles which could
+ *        be doing hash-for-home caching.
+ * @param tlb_va Virtual address to flush from TLB (ignored if
+ *        tlb_length is zero or tlb_cpumask is NULL).
+ * @param tlb_length Number of bytes of data to flush from the TLB.
+ * @param tlb_pgsize Page size to use for TLB flushes.
+ *        tlb_va and tlb_length need not be aligned to this size.
+ * @param tlb_cpumask Bitmask for tlb flush, like cache_cpumask.
+ *        If passed as a NULL pointer, equivalent to an empty bitmask.
+ * @param asids Pointer to an HV_Remote_ASID array of tile/ASID pairs to flush.
+ * @param asidcount Number of HV_Remote_ASID entries in asids[].
+ * @return Zero for success, or else HV_EINVAL or HV_EFAULT for errors that
+ *        are detected while parsing the arguments.
+ */
+int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
+                    unsigned long* cache_cpumask,
+                    HV_VirtAddr tlb_va, unsigned long tlb_length,
+                    unsigned long tlb_pgsize, unsigned long* tlb_cpumask,
+                    HV_Remote_ASID* asids, int asidcount);
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1UL << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1UL << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1UL << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1UL
+
+#else   /* __ASSEMBLER__ */
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1 << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1 << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1 << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1
+
+#endif  /* __ASSEMBLER__ */
+
+#ifndef __ASSEMBLER__
+
+/** Return a 64-bit value corresponding to the PTE if needed */
+#define hv_pte_val(pte) ((pte).val)
+
+/** Cast a 64-bit value to an HV_PTE */
+#define hv_pte(val) ((HV_PTE) { val })
+
+#endif  /* !__ASSEMBLER__ */
+
+
+/** Bits in the size of an HV_PTE */
+#define HV_LOG2_PTE_SIZE 3
+
+/** Size of an HV_PTE */
+#define HV_PTE_SIZE (1 << HV_LOG2_PTE_SIZE)
+
+
+/* Bits in HV_PTE's low word. */
+#define HV_PTE_INDEX_PRESENT          0  /**< PTE is valid */
+#define HV_PTE_INDEX_MIGRATING        1  /**< Page is migrating */
+#define HV_PTE_INDEX_CLIENT0          2  /**< Page client state 0 */
+#define HV_PTE_INDEX_CLIENT1          3  /**< Page client state 1 */
+#define HV_PTE_INDEX_NC               4  /**< L1$/L2$ incoherent with L3$ */
+#define HV_PTE_INDEX_NO_ALLOC_L1      5  /**< Page is uncached in local L1$ */
+#define HV_PTE_INDEX_NO_ALLOC_L2      6  /**< Page is uncached in local L2$ */
+#define HV_PTE_INDEX_CACHED_PRIORITY  7  /**< Page is priority cached */
+#define HV_PTE_INDEX_PAGE             8  /**< PTE describes a page */
+#define HV_PTE_INDEX_GLOBAL           9  /**< Page is global */
+#define HV_PTE_INDEX_USER            10  /**< Page is user-accessible */
+#define HV_PTE_INDEX_ACCESSED        11  /**< Page has been accessed */
+#define HV_PTE_INDEX_DIRTY           12  /**< Page has been written */
+                                         /*   Bits 13-15 are reserved for
+                                              future use. */
+#define HV_PTE_INDEX_MODE            16  /**< Page mode; see HV_PTE_MODE_xxx */
+#define HV_PTE_MODE_BITS              3  /**< Number of bits in mode */
+                                         /*   Bit 19 is reserved for
+                                              future use. */
+#define HV_PTE_INDEX_LOTAR           20  /**< Page's LOTAR; must be high bits
+                                              of word */
+#define HV_PTE_LOTAR_BITS            12  /**< Number of bits in a LOTAR */
+
+/* Bits in HV_PTE's high word. */
+#define HV_PTE_INDEX_READABLE        32  /**< Page is readable */
+#define HV_PTE_INDEX_WRITABLE        33  /**< Page is writable */
+#define HV_PTE_INDEX_EXECUTABLE      34  /**< Page is executable */
+#define HV_PTE_INDEX_PTFN            35  /**< Page's PTFN; must be high bits
+                                              of word */
+#define HV_PTE_PTFN_BITS             29  /**< Number of bits in a PTFN */
+
+/** Position of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \
+                                               HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Length of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \
+                               (HV_LOG2_PAGE_SIZE_SMALL - \
+                                HV_LOG2_PAGE_TABLE_ALIGN))
+
+/*
+ * Legal values for the PTE's mode field
+ */
+/** Data is not resident in any caches; loads and stores access memory
+ *  directly.
+ */
+#define HV_PTE_MODE_UNCACHED          1
+
+/** Data is resident in the tile's local L1 and/or L2 caches; if a load
+ *  or store misses there, it goes to memory.
+ *
+ *  The copy in the local L1$/L2$ is not invalidated when the copy in
+ *  memory is changed.
+ */
+#define HV_PTE_MODE_CACHE_NO_L3       2
+
+/** Data is resident in the tile's local L1 and/or L2 caches.  If a load
+ *  or store misses there, it goes to an L3 cache in a designated tile;
+ *  if it misses there, it goes to memory.
+ *
+ *  If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ *  when the copy in the remote L3$ is changed.  Otherwise, such
+ *  invalidation will not occur.
+ *
+ *  Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ *  invalidation from an L3$ to another tile's L1$/L2$.  If the NC bit is
+ *  clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_TILE_L3     3
+
+/** Data is resident in the tile's local L1 and/or L2 caches.  If a load
+ *  or store misses there, it goes to an L3 cache in one of a set of
+ *  designated tiles; if it misses there, it goes to memory.  Which tile
+ *  is chosen from the set depends upon a hash function applied to the
+ *  physical address.  This mode is not supported on chips for which
+ *  CHIP_HAS_CBOX_HOME_MAP() is 0.
+ *
+ *  If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ *  when the copy in the remote L3$ is changed.  Otherwise, such
+ *  invalidation will not occur.
+ *
+ *  Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ *  invalidation from an L3$ to another tile's L1$/L2$.  If the NC bit is
+ *  clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_HASH_L3     4
+
+/** Data is not resident in memory; accesses are instead made to an I/O
+ *  device, whose tile coordinates are given by the PTE's LOTAR field.
+ *  This mode is only supported on chips for which CHIP_HAS_MMIO() is 1.
+ *  The EXECUTABLE bit may not be set in an MMIO PTE.
+ */
+#define HV_PTE_MODE_MMIO              5
+
+
+/* C wants 1ULL so it is typed as __hv64, but the assembler needs just numbers.
+ * The assembler can't handle shifts greater than 31, but treats them
+ * as shifts mod 32, so assembler code must be aware of which word
+ * the bit belongs in when using these macros.
+ */
+#ifdef __ASSEMBLER__
+#define __HV_PTE_ONE 1        /**< One, for assembler */
+#else
+#define __HV_PTE_ONE 1ULL     /**< One, for C */
+#endif
+
+/** Is this PTE present?
+ *
+ * If this bit is set, this PTE represents a valid translation or level-2
+ * page table pointer.  Otherwise, the page table does not contain a
+ * translation for the subject virtual pages.
+ *
+ * If this bit is not set, the other bits in the PTE are not
+ * interpreted by the hypervisor, and may contain any value.
+ */
+#define HV_PTE_PRESENT               (__HV_PTE_ONE << HV_PTE_INDEX_PRESENT)
+
+/** Does this PTE map a page?
+ *
+ * If this bit is set in the level-1 page table, the entry should be
+ * interpreted as a level-2 page table entry mapping a large page.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * In a level-2 page table, this bit is ignored and must be zero.
+ */
+#define HV_PTE_PAGE                  (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
+
+/** Is this a global (non-ASID) mapping?
+ *
+ * If this bit is set, the translations established by this PTE will
+ * not be flushed from the TLB by the hv_flush_asid() service; they
+ * will be flushed by the hv_flush_page() or hv_flush_pages() services.
+ *
+ * Setting this bit for translations which are identical in all page
+ * tables (for instance, code and data belonging to a client OS) can
+ * be very beneficial, as it will reduce the number of TLB misses.
+ * Note that, while it is not an error which will be detected by the
+ * hypervisor, it is an extremely bad idea to set this bit for
+ * translations which are _not_ identical in all page tables.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_GLOBAL                (__HV_PTE_ONE << HV_PTE_INDEX_GLOBAL)
+
+/** Is this mapping accessible to users?
+ *
+ * If this bit is set, code running at any PL will be permitted to
+ * access the virtual addresses mapped by this PTE.  Otherwise, only
+ * code running at PL 1 or above will be allowed to do so.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_USER                  (__HV_PTE_ONE << HV_PTE_INDEX_USER)
+
+/** Has this mapping been accessed?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is accessed for the first time.  It is never cleared by
+ * the hypervisor, but may be cleared by the client.  After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_ACCESSED              (__HV_PTE_ONE << HV_PTE_INDEX_ACCESSED)
+
+/** Is this mapping dirty?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is written for the first time.  It is never cleared by
+ * the hypervisor, but may be cleared by the client.  After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_DIRTY                 (__HV_PTE_ONE << HV_PTE_INDEX_DIRTY)
+
+/** Migrating bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.  The name is indicative of the suggested use by the client
+ * to tag pages whose L3 cache is being migrated from one cpu to another.
+ */
+#define HV_PTE_MIGRATING             (__HV_PTE_ONE << HV_PTE_INDEX_MIGRATING)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT0               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT0)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT1               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1)
+
+/** Non-coherent (NC) bit in PTE.
+ *
+ * If this bit is set, the mapping that is set up will be non-coherent
+ * (also known as non-inclusive).  This means that changes to the L3
+ * cache will not cause a local copy to be invalidated.  It is generally
+ * recommended only for read-only mappings.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NC                    (__HV_PTE_ONE << HV_PTE_INDEX_NC)
+
+/** Is this page prevented from filling the L1$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L1 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit
+ * determines how the level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L1           (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L1)
+
+/** Is this page prevented from filling the L2$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L2 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L2           (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L2)
+
+/** Is this a priority page?
+ *
+ * If this bit is set, the page described by the PTE will be given
+ * priority in the cache.  Normally this translates into allowing the
+ * page to use only the "red" half of the cache.  The client may wish to
+ * then use the hv_set_caching service to specify that other pages which
+ * alias this page will use only the "black" half of the cache.
+ *
+ * If the Cached Priority bit is clear, the hypervisor uses the
+ * current hv_set_caching() value to choose how to cache the page.
+ *
+ * It is illegal to set the Cached Priority bit if the Non-Cached bit
+ * is set and the Cached Remotely bit is clear, i.e. if requests to
+ * the page map directly to memory.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_CACHED_PRIORITY       (__HV_PTE_ONE << \
+                                      HV_PTE_INDEX_CACHED_PRIORITY)
+
+/** Is this a readable mapping?
+ *
+ * If this bit is set, code will be permitted to read from (e.g.,
+ * issue load instructions against) the virtual addresses mapped by
+ * this PTE.
+ *
+ * It is illegal for this bit to be clear if the Writable bit is set.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_READABLE              (__HV_PTE_ONE << HV_PTE_INDEX_READABLE)
+
+/** Is this a writable mapping?
+ *
+ * If this bit is set, code will be permitted to write to (e.g., issue
+ * store instructions against) the virtual addresses mapped by this
+ * PTE.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_WRITABLE              (__HV_PTE_ONE << HV_PTE_INDEX_WRITABLE)
+
+/** Is this an executable mapping?
+ *
+ * If this bit is set, code will be permitted to execute from
+ * (e.g., jump to) the virtual addresses mapped by this PTE.
+ *
+ * This bit applies to any processor on the tile, if there are more
+ * than one.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_EXECUTABLE            (__HV_PTE_ONE << HV_PTE_INDEX_EXECUTABLE)
+
+/** The width of a LOTAR's x or y bitfield. */
+#define HV_LOTAR_WIDTH 11
+
+/** Converts an x,y pair to a LOTAR value. */
+#define HV_XY_TO_LOTAR(x, y) ((HV_LOTAR)(((x) << HV_LOTAR_WIDTH) | (y)))
+
+/** Extracts the X component of a lotar. */
+#define HV_LOTAR_X(lotar) ((lotar) >> HV_LOTAR_WIDTH)
+
+/** Extracts the Y component of a lotar. */
+#define HV_LOTAR_Y(lotar) ((lotar) & ((1 << HV_LOTAR_WIDTH) - 1))
+
+#ifndef __ASSEMBLER__
+
+/** Define accessor functions for a PTE bit. */
+#define _HV_BIT(name, bit)                                      \
+static __inline int                                             \
+hv_pte_get_##name(HV_PTE pte)                                   \
+{                                                               \
+  return (pte.val >> HV_PTE_INDEX_##bit) & 1;                   \
+}                                                               \
+                                                                \
+static __inline HV_PTE                                          \
+hv_pte_set_##name(HV_PTE pte)                                   \
+{                                                               \
+  pte.val |= 1ULL << HV_PTE_INDEX_##bit;                        \
+  return pte;                                                   \
+}                                                               \
+                                                                \
+static __inline HV_PTE                                          \
+hv_pte_clear_##name(HV_PTE pte)                                 \
+{                                                               \
+  pte.val &= ~(1ULL << HV_PTE_INDEX_##bit);                     \
+  return pte;                                                   \
+}
+
+/* Generate accessors to get, set, and clear various PTE flags.
+ */
+_HV_BIT(present,         PRESENT)
+_HV_BIT(page,            PAGE)
+_HV_BIT(client0,         CLIENT0)
+_HV_BIT(client1,         CLIENT1)
+_HV_BIT(migrating,       MIGRATING)
+_HV_BIT(nc,              NC)
+_HV_BIT(readable,        READABLE)
+_HV_BIT(writable,        WRITABLE)
+_HV_BIT(executable,      EXECUTABLE)
+_HV_BIT(accessed,        ACCESSED)
+_HV_BIT(dirty,           DIRTY)
+_HV_BIT(no_alloc_l1,     NO_ALLOC_L1)
+_HV_BIT(no_alloc_l2,     NO_ALLOC_L2)
+_HV_BIT(cached_priority, CACHED_PRIORITY)
+_HV_BIT(global,          GLOBAL)
+_HV_BIT(user,            USER)
+
+#undef _HV_BIT
+
+/** Get the page mode from the PTE.
+ *
+ * This field generally determines whether and how accesses to the page
+ * are cached; the HV_PTE_MODE_xxx symbols define the legal values for the
+ * page mode.  The NC, NO_ALLOC_L1, and NO_ALLOC_L2 bits modify this
+ * general policy.
+ */
+static __inline unsigned int
+hv_pte_get_mode(const HV_PTE pte)
+{
+  return (((__hv32) pte.val) >> HV_PTE_INDEX_MODE) &
+         ((1 << HV_PTE_MODE_BITS) - 1);
+}
+
+/** Set the page mode into a PTE.  See hv_pte_get_mode. */
+static __inline HV_PTE
+hv_pte_set_mode(HV_PTE pte, unsigned int val)
+{
+  pte.val &= ~(((1ULL << HV_PTE_MODE_BITS) - 1) << HV_PTE_INDEX_MODE);
+  pte.val |= val << HV_PTE_INDEX_MODE;
+  return pte;
+}
+
+/** Get the page frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page; the complete CPA is this field with
+ * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table where the Page bit is set, the
+ * CPA must be aligned modulo the large page size.
+ */
+static __inline unsigned int
+hv_pte_get_pfn(const HV_PTE pte)
+{
+  return pte.val >> HV_PTE_INDEX_PFN;
+}
+
+
+/** Set the page frame number into a PTE.  See hv_pte_get_pfn. */
+static __inline HV_PTE
+hv_pte_set_pfn(HV_PTE pte, unsigned int val)
+{
+  /*
+   * Note that the use of "PTFN" in the next line is intentional; we
+   * don't want any garbage lower bits left in that field.
+   */
+  pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN);
+  pte.val |= (__hv64) val << HV_PTE_INDEX_PFN;
+  return pte;
+}
+
+/** Get the page table frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page table; the complete CPA is this field with
+ * with HV_PAGE_TABLE_ALIGN zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table when the Page bit is not set, the
+ * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and
+ * the level-2 page table size.
+ */
+static __inline unsigned long
+hv_pte_get_ptfn(const HV_PTE pte)
+{
+  return pte.val >> HV_PTE_INDEX_PTFN;
+}
+
+
+/** Set the page table frame number into a PTE.  See hv_pte_get_ptfn. */
+static __inline HV_PTE
+hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
+{
+  pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS)-1) << HV_PTE_INDEX_PTFN);
+  pte.val |= (__hv64) val << HV_PTE_INDEX_PTFN;
+  return pte;
+}
+
+
+/** Get the remote tile caching this page.
+ *
+ * Specifies the remote tile which is providing the L3 cache for this page.
+ *
+ * This field is ignored unless the page mode is HV_PTE_MODE_CACHE_TILE_L3.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this field determines how the
+ * level-2 page table is accessed.
+ */
+static __inline unsigned int
+hv_pte_get_lotar(const HV_PTE pte)
+{
+  unsigned int lotar = ((__hv32) pte.val) >> HV_PTE_INDEX_LOTAR;
+
+  return HV_XY_TO_LOTAR( (lotar >> (HV_PTE_LOTAR_BITS / 2)),
+                         (lotar & ((1 << (HV_PTE_LOTAR_BITS / 2)) - 1)) );
+}
+
+
+/** Set the remote tile caching a page into a PTE.  See hv_pte_get_lotar. */
+static __inline HV_PTE
+hv_pte_set_lotar(HV_PTE pte, unsigned int val)
+{
+  unsigned int x = HV_LOTAR_X(val);
+  unsigned int y = HV_LOTAR_Y(val);
+
+  pte.val &= ~(((1ULL << HV_PTE_LOTAR_BITS)-1) << HV_PTE_INDEX_LOTAR);
+  pte.val |= (x << (HV_PTE_INDEX_LOTAR + HV_PTE_LOTAR_BITS / 2)) |
+             (y << HV_PTE_INDEX_LOTAR);
+  return pte;
+}
+
+#endif  /* !__ASSEMBLER__ */
+
+/** Converts a client physical address to a pfn. */
+#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a pfn to a client physical address. */
+#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a client physical address to a ptfn. */
+#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a client physical address. */
+#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a pfn. */
+#define HV_PTFN_TO_PFN(p) \
+  ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Converts a pfn to a ptfn. */
+#define HV_PFN_TO_PTFN(p) \
+  ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Log number of HV_PTE entries in L0 page table */
+#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN)
+
+/** Number of HV_PTE entries in L0 page table */
+#define HV_L0_ENTRIES (1 << HV_LOG2_L0_ENTRIES)
+
+/** Log size of L0 page table in bytes */
+#define HV_LOG2_L0_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L0_ENTRIES)
+
+/** Size of L0 page table in bytes */
+#define HV_L0_SIZE (1 << HV_LOG2_L0_SIZE)
+
+#ifdef __ASSEMBLER__
+
+/** Index in L0 for a specific VA */
+#define HV_L0_INDEX(va) \
+  (((va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#else
+
+/** Index in L1 for a specific VA */
+#define HV_L0_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#endif
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Log number of HV_PTE entries in L1 page table */
+#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE)
+
+/** Number of HV_PTE entries in L1 page table */
+#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES)
+
+/** Log size of L1 page table in bytes */
+#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES)
+
+/** Size of L1 page table in bytes */
+#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE)
+
+/** Log number of HV_PTE entries in level-2 page table */
+#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Number of HV_PTE entries in level-2 page table */
+#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES)
+
+/** Log size of level-2 page table in bytes */
+#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES)
+
+/** Size of level-2 page table in bytes */
+#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE)
+
+#ifdef __ASSEMBLER__
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#else /* __ASSEMBLER __ */
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#endif /* __ASSEMBLER __ */
+
+#endif /* _TILE_HV_H */
diff --git a/arch/tile/include/hv/syscall_public.h b/arch/tile/include/hv/syscall_public.h
new file mode 100644
index 0000000..9cc0837
--- /dev/null
+++ b/arch/tile/include/hv/syscall_public.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file syscall.h
+ * Indices for the hypervisor system calls that are intended to be called
+ * directly, rather than only through hypervisor-generated "glue" code.
+ */
+
+#ifndef _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+#define _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+
+/** Fast syscall flag bit location.  When this bit is set, the hypervisor
+ *  handles the syscall specially.
+ */
+#define HV_SYS_FAST_SHIFT                 14
+
+/** Fast syscall flag bit mask. */
+#define HV_SYS_FAST_MASK                  (1 << HV_SYS_FAST_SHIFT)
+
+/** Bit location for flagging fast syscalls that can be called from PL0. */
+#define HV_SYS_FAST_PLO_SHIFT             13
+
+/** Fast syscall allowing PL0 bit mask. */
+#define HV_SYS_FAST_PL0_MASK              (1 << HV_SYS_FAST_PLO_SHIFT)
+
+/** Perform an MF that waits for all victims to reach DRAM. */
+#define HV_SYS_fence_incoherent         (51 | HV_SYS_FAST_MASK \
+                                       | HV_SYS_FAST_PL0_MASK)
+
+#endif /* !_SYS_HV_INCLUDE_SYSCALL_PUBLIC_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
new file mode 100644
index 0000000..756e6ec
--- /dev/null
+++ b/arch/tile/kernel/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the Linux/TILE kernel.
+#
+
+extra-y := vmlinux.lds head_$(BITS).o
+obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \
+	pci-dma.o proc.o process.o ptrace.o reboot.o \
+	setup.o signal.o single_step.o stack.o sys.o time.o traps.o \
+	intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
+
+obj-$(CONFIG_TILEGX)		+= futex_64.o
+obj-$(CONFIG_COMPAT)		+= compat.o compat_signal.o
+obj-$(CONFIG_SMP)		+= smpboot.o smp.o tlb.o
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
new file mode 100644
index 0000000..01ddf19
--- /dev/null
+++ b/arch/tile/kernel/asm-offsets.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Generates definitions from c-type structures used by assembly sources.
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <hv/hypervisor.h>
+
+/* Check for compatible compiler early in the build. */
+#ifdef CONFIG_TILEGX
+# ifndef __tilegx__
+#  error Can only build TILE-Gx configurations with tilegx compiler
+# endif
+# ifndef __LP64__
+#  error Must not specify -m32 when building the TILE-Gx kernel
+# endif
+#else
+# ifdef __tilegx__
+#  error Can not build TILEPro/TILE64 configurations with tilegx compiler
+# endif
+#endif
+
+void foo(void)
+{
+	DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \
+	       offsetof(struct single_step_state, buffer));
+	DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \
+	       offsetof(struct single_step_state, flags));
+	DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \
+	       offsetof(struct single_step_state, orig_pc));
+	DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \
+	       offsetof(struct single_step_state, next_pc));
+	DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \
+	       offsetof(struct single_step_state, branch_next_pc));
+	DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \
+	       offsetof(struct single_step_state, update_value));
+
+	DEFINE(THREAD_INFO_TASK_OFFSET, \
+	       offsetof(struct thread_info, task));
+	DEFINE(THREAD_INFO_FLAGS_OFFSET, \
+	       offsetof(struct thread_info, flags));
+	DEFINE(THREAD_INFO_STATUS_OFFSET, \
+	       offsetof(struct thread_info, status));
+	DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \
+	       offsetof(struct thread_info, homecache_cpu));
+	DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \
+	       offsetof(struct thread_info, step_state));
+
+	DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
+	       offsetof(struct task_struct, thread.ksp));
+	DEFINE(TASK_STRUCT_THREAD_PC_OFFSET,
+	       offsetof(struct task_struct, thread.pc));
+
+	DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \
+	       offsetof(HV_Topology, width));
+	DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \
+	       offsetof(HV_Topology, height));
+
+	DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \
+	       offsetof(irq_cpustat_t, irq_syscall_count));
+}
diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c
new file mode 100644
index 0000000..1b0a410
--- /dev/null
+++ b/arch/tile/kernel/backtrace.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/backtrace.h>
+
+#include <arch/chip.h>
+
+#if TILE_CHIP < 10
+
+
+#include <asm/opcode-tile.h>
+
+
+#define TREG_SP 54
+#define TREG_LR 55
+
+
+/** A decoded bundle used for backtracer analysis. */
+typedef struct {
+	tile_bundle_bits bits;
+	int num_insns;
+	struct tile_decoded_instruction
+	insns[TILE_MAX_INSTRUCTIONS_PER_BUNDLE];
+} BacktraceBundle;
+
+
+/* This implementation only makes sense for native tools. */
+/** Default function to read memory. */
+static bool
+bt_read_memory(void *result, VirtualAddress addr, size_t size, void *extra)
+{
+	/* FIXME: this should do some horrible signal stuff to catch
+	 * SEGV cleanly and fail.
+	 *
+	 * Or else the caller should do the setjmp for efficiency.
+	 */
+
+	memcpy(result, (const void *)addr, size);
+	return true;
+}
+
+
+/** Locates an instruction inside the given bundle that
+ * has the specified mnemonic, and whose first 'num_operands_to_match'
+ * operands exactly match those in 'operand_values'.
+ */
+static const struct tile_decoded_instruction*
+find_matching_insn(const BacktraceBundle *bundle,
+		   tile_mnemonic mnemonic,
+		   const int *operand_values,
+		   int num_operands_to_match)
+{
+	int i, j;
+	bool match;
+
+	for (i = 0; i < bundle->num_insns; i++) {
+		const struct tile_decoded_instruction *insn =
+			&bundle->insns[i];
+
+		if (insn->opcode->mnemonic != mnemonic)
+			continue;
+
+		match = true;
+		for (j = 0; j < num_operands_to_match; j++) {
+			if (operand_values[j] != insn->operand_values[j]) {
+				match = false;
+				break;
+			}
+		}
+
+		if (match)
+			return insn;
+	}
+
+	return NULL;
+}
+
+/** Does this bundle contain an 'iret' instruction? */
+static inline bool
+bt_has_iret(const BacktraceBundle *bundle)
+{
+	return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL;
+}
+
+/** Does this bundle contain an 'addi sp, sp, OFFSET' or
+ * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET?
+ */
+static bool
+bt_has_addi_sp(const BacktraceBundle *bundle, int *adjust)
+{
+	static const int vals[2] = { TREG_SP, TREG_SP };
+
+	const struct tile_decoded_instruction *insn =
+		find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2);
+	if (insn == NULL)
+		insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2);
+	if (insn == NULL)
+		return false;
+
+	*adjust = insn->operand_values[2];
+	return true;
+}
+
+/** Does this bundle contain any 'info OP' or 'infol OP'
+ * instruction, and if so, what are their OP?  Note that OP is interpreted
+ * as an unsigned value by this code since that's what the caller wants.
+ * Returns the number of info ops found.
+ */
+static int
+bt_get_info_ops(const BacktraceBundle *bundle,
+		int operands[MAX_INFO_OPS_PER_BUNDLE])
+{
+	int num_ops = 0;
+	int i;
+
+	for (i = 0; i < bundle->num_insns; i++) {
+		const struct tile_decoded_instruction *insn =
+			&bundle->insns[i];
+
+		if (insn->opcode->mnemonic == TILE_OPC_INFO ||
+		    insn->opcode->mnemonic == TILE_OPC_INFOL) {
+			operands[num_ops++] = insn->operand_values[0];
+		}
+	}
+
+	return num_ops;
+}
+
+/** Does this bundle contain a jrp instruction, and if so, to which
+ * register is it jumping?
+ */
+static bool
+bt_has_jrp(const BacktraceBundle *bundle, int *target_reg)
+{
+	const struct tile_decoded_instruction *insn =
+		find_matching_insn(bundle, TILE_OPC_JRP, NULL, 0);
+	if (insn == NULL)
+		return false;
+
+	*target_reg = insn->operand_values[0];
+	return true;
+}
+
+/** Does this bundle modify the specified register in any way? */
+static bool
+bt_modifies_reg(const BacktraceBundle *bundle, int reg)
+{
+	int i, j;
+	for (i = 0; i < bundle->num_insns; i++) {
+		const struct tile_decoded_instruction *insn =
+			&bundle->insns[i];
+
+		if (insn->opcode->implicitly_written_register == reg)
+			return true;
+
+		for (j = 0; j < insn->opcode->num_operands; j++)
+			if (insn->operands[j]->is_dest_reg &&
+			    insn->operand_values[j] == reg)
+				return true;
+	}
+
+	return false;
+}
+
+/** Does this bundle modify sp? */
+static inline bool
+bt_modifies_sp(const BacktraceBundle *bundle)
+{
+	return bt_modifies_reg(bundle, TREG_SP);
+}
+
+/** Does this bundle modify lr? */
+static inline bool
+bt_modifies_lr(const BacktraceBundle *bundle)
+{
+	return bt_modifies_reg(bundle, TREG_LR);
+}
+
+/** Does this bundle contain the instruction 'move fp, sp'? */
+static inline bool
+bt_has_move_r52_sp(const BacktraceBundle *bundle)
+{
+	static const int vals[2] = { 52, TREG_SP };
+	return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL;
+}
+
+/** Does this bundle contain the instruction 'sw sp, lr'? */
+static inline bool
+bt_has_sw_sp_lr(const BacktraceBundle *bundle)
+{
+	static const int vals[2] = { TREG_SP, TREG_LR };
+	return find_matching_insn(bundle, TILE_OPC_SW, vals, 2) != NULL;
+}
+
+/** Locates the caller's PC and SP for a program starting at the
+ * given address.
+ */
+static void
+find_caller_pc_and_caller_sp(CallerLocation *location,
+			     const VirtualAddress start_pc,
+			     BacktraceMemoryReader read_memory_func,
+			     void *read_memory_func_extra)
+{
+	/* Have we explicitly decided what the sp is,
+	 * rather than just the default?
+	 */
+	bool sp_determined = false;
+
+	/* Has any bundle seen so far modified lr? */
+	bool lr_modified = false;
+
+	/* Have we seen a move from sp to fp? */
+	bool sp_moved_to_r52 = false;
+
+	/* Have we seen a terminating bundle? */
+	bool seen_terminating_bundle = false;
+
+	/* Cut down on round-trip reading overhead by reading several
+	 * bundles at a time.
+	 */
+	tile_bundle_bits prefetched_bundles[32];
+	int num_bundles_prefetched = 0;
+	int next_bundle = 0;
+	VirtualAddress pc;
+
+	/* Default to assuming that the caller's sp is the current sp.
+	 * This is necessary to handle the case where we start backtracing
+	 * right at the end of the epilog.
+	 */
+	location->sp_location = SP_LOC_OFFSET;
+	location->sp_offset = 0;
+
+	/* Default to having no idea where the caller PC is. */
+	location->pc_location = PC_LOC_UNKNOWN;
+
+	/* Don't even try if the PC is not aligned. */
+	if (start_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0)
+		return;
+
+	for (pc = start_pc;; pc += sizeof(tile_bundle_bits)) {
+
+		BacktraceBundle bundle;
+		int num_info_ops, info_operands[MAX_INFO_OPS_PER_BUNDLE];
+		int one_ago, jrp_reg;
+		bool has_jrp;
+
+		if (next_bundle >= num_bundles_prefetched) {
+			/* Prefetch some bytes, but don't cross a page
+			 * boundary since that might cause a read failure we
+			 * don't care about if we only need the first few
+			 * bytes. Note: we don't care what the actual page
+			 * size is; using the minimum possible page size will
+			 * prevent any problems.
+			 */
+			unsigned int bytes_to_prefetch = 4096 - (pc & 4095);
+			if (bytes_to_prefetch > sizeof prefetched_bundles)
+				bytes_to_prefetch = sizeof prefetched_bundles;
+
+			if (!read_memory_func(prefetched_bundles, pc,
+					      bytes_to_prefetch,
+					      read_memory_func_extra)) {
+				if (pc == start_pc) {
+					/* The program probably called a bad
+					 * address, such as a NULL pointer.
+					 * So treat this as if we are at the
+					 * start of the function prolog so the
+					 * backtrace will show how we got here.
+					 */
+					location->pc_location = PC_LOC_IN_LR;
+					return;
+				}
+
+				/* Unreadable address. Give up. */
+				break;
+			}
+
+			next_bundle = 0;
+			num_bundles_prefetched =
+				bytes_to_prefetch / sizeof(tile_bundle_bits);
+		}
+
+		/* Decode the next bundle. */
+		bundle.bits = prefetched_bundles[next_bundle++];
+		bundle.num_insns =
+			parse_insn_tile(bundle.bits, pc, bundle.insns);
+		num_info_ops = bt_get_info_ops(&bundle, info_operands);
+
+		/* First look at any one_ago info ops if they are interesting,
+		 * since they should shadow any non-one-ago info ops.
+		 */
+		for (one_ago = (pc != start_pc) ? 1 : 0;
+		     one_ago >= 0; one_ago--) {
+			int i;
+			for (i = 0; i < num_info_ops; i++) {
+				int info_operand = info_operands[i];
+				if (info_operand < CALLER_UNKNOWN_BASE)	{
+					/* Weird; reserved value, ignore it. */
+					continue;
+				}
+
+				/* Skip info ops which are not in the
+				 * "one_ago" mode we want right now.
+				 */
+				if (((info_operand & ONE_BUNDLE_AGO_FLAG) != 0)
+				    != (one_ago != 0))
+					continue;
+
+				/* Clear the flag to make later checking
+				 * easier. */
+				info_operand &= ~ONE_BUNDLE_AGO_FLAG;
+
+				/* Default to looking at PC_IN_LR_FLAG. */
+				if (info_operand & PC_IN_LR_FLAG)
+					location->pc_location =
+						PC_LOC_IN_LR;
+				else
+					location->pc_location =
+						PC_LOC_ON_STACK;
+
+				switch (info_operand) {
+				case CALLER_UNKNOWN_BASE:
+					location->pc_location = PC_LOC_UNKNOWN;
+					location->sp_location = SP_LOC_UNKNOWN;
+					return;
+
+				case CALLER_SP_IN_R52_BASE:
+				case CALLER_SP_IN_R52_BASE | PC_IN_LR_FLAG:
+					location->sp_location = SP_LOC_IN_R52;
+					return;
+
+				default:
+				{
+					const unsigned int val = info_operand
+						- CALLER_SP_OFFSET_BASE;
+					const unsigned int sp_offset =
+						(val >> NUM_INFO_OP_FLAGS) * 8;
+					if (sp_offset < 32768) {
+						/* This is a properly encoded
+						 * SP offset. */
+						location->sp_location =
+							SP_LOC_OFFSET;
+						location->sp_offset =
+							sp_offset;
+						return;
+					} else {
+						/* This looked like an SP
+						 * offset, but it's outside
+						 * the legal range, so this
+						 * must be an unrecognized
+						 * info operand.  Ignore it.
+						 */
+					}
+				}
+				break;
+				}
+			}
+		}
+
+		if (seen_terminating_bundle) {
+			/* We saw a terminating bundle during the previous
+			 * iteration, so we were only looking for an info op.
+			 */
+			break;
+		}
+
+		if (bundle.bits == 0) {
+			/* Wacky terminating bundle. Stop looping, and hope
+			 * we've already seen enough to find the caller.
+			 */
+			break;
+		}
+
+		/*
+		 * Try to determine caller's SP.
+		 */
+
+		if (!sp_determined) {
+			int adjust;
+			if (bt_has_addi_sp(&bundle, &adjust)) {
+				location->sp_location = SP_LOC_OFFSET;
+
+				if (adjust <= 0) {
+					/* We are in prolog about to adjust
+					 * SP. */
+					location->sp_offset = 0;
+				} else {
+					/* We are in epilog restoring SP. */
+					location->sp_offset = adjust;
+				}
+
+				sp_determined = true;
+			} else {
+				if (bt_has_move_r52_sp(&bundle)) {
+					/* Maybe in prolog, creating an
+					 * alloca-style frame.  But maybe in
+					 * the middle of a fixed-size frame
+					 * clobbering r52 with SP.
+					 */
+					sp_moved_to_r52 = true;
+				}
+
+				if (bt_modifies_sp(&bundle)) {
+					if (sp_moved_to_r52) {
+						/* We saw SP get saved into
+						 * r52 earlier (or now), which
+						 * must have been in the
+						 * prolog, so we now know that
+						 * SP is still holding the
+						 * caller's sp value.
+						 */
+						location->sp_location =
+							SP_LOC_OFFSET;
+						location->sp_offset = 0;
+					} else {
+						/* Someone must have saved
+						 * aside the caller's SP value
+						 * into r52, so r52 holds the
+						 * current value.
+						 */
+						location->sp_location =
+							SP_LOC_IN_R52;
+					}
+					sp_determined = true;
+				}
+			}
+		}
+
+		if (bt_has_iret(&bundle)) {
+			/* This is a terminating bundle. */
+			seen_terminating_bundle = true;
+			continue;
+		}
+
+		/*
+		 * Try to determine caller's PC.
+		 */
+
+		jrp_reg = -1;
+		has_jrp = bt_has_jrp(&bundle, &jrp_reg);
+		if (has_jrp)
+			seen_terminating_bundle = true;
+
+		if (location->pc_location == PC_LOC_UNKNOWN) {
+			if (has_jrp) {
+				if (jrp_reg == TREG_LR && !lr_modified) {
+					/* Looks like a leaf function, or else
+					 * lr is already restored. */
+					location->pc_location =
+						PC_LOC_IN_LR;
+				} else {
+					location->pc_location =
+						PC_LOC_ON_STACK;
+				}
+			} else if (bt_has_sw_sp_lr(&bundle)) {
+				/* In prolog, spilling initial lr to stack. */
+				location->pc_location = PC_LOC_IN_LR;
+			} else if (bt_modifies_lr(&bundle)) {
+				lr_modified = true;
+			}
+		}
+	}
+}
+
+void
+backtrace_init(BacktraceIterator *state,
+	       BacktraceMemoryReader read_memory_func,
+	       void *read_memory_func_extra,
+	       VirtualAddress pc, VirtualAddress lr,
+	       VirtualAddress sp, VirtualAddress r52)
+{
+	CallerLocation location;
+	VirtualAddress fp, initial_frame_caller_pc;
+
+	if (read_memory_func == NULL) {
+		read_memory_func = bt_read_memory;
+	}
+
+	/* Find out where we are in the initial frame. */
+	find_caller_pc_and_caller_sp(&location, pc,
+				     read_memory_func, read_memory_func_extra);
+
+	switch (location.sp_location) {
+	case SP_LOC_UNKNOWN:
+		/* Give up. */
+		fp = -1;
+		break;
+
+	case SP_LOC_IN_R52:
+		fp = r52;
+		break;
+
+	case SP_LOC_OFFSET:
+		fp = sp + location.sp_offset;
+		break;
+
+	default:
+		/* Give up. */
+		fp = -1;
+		break;
+	}
+
+	/* The frame pointer should theoretically be aligned mod 8. If
+	 * it's not even aligned mod 4 then something terrible happened
+	 * and we should mark it as invalid.
+	 */
+	if (fp % 4 != 0)
+		fp = -1;
+
+	/* -1 means "don't know initial_frame_caller_pc". */
+	initial_frame_caller_pc = -1;
+
+	switch (location.pc_location) {
+	case PC_LOC_UNKNOWN:
+		/* Give up. */
+		fp = -1;
+		break;
+
+	case PC_LOC_IN_LR:
+		if (lr == 0 || lr % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) {
+			/* Give up. */
+			fp = -1;
+		} else {
+			initial_frame_caller_pc = lr;
+		}
+		break;
+
+	case PC_LOC_ON_STACK:
+		/* Leave initial_frame_caller_pc as -1,
+		 * meaning check the stack.
+		 */
+		break;
+
+	default:
+		/* Give up. */
+		fp = -1;
+		break;
+	}
+
+	state->pc = pc;
+	state->sp = sp;
+	state->fp = fp;
+	state->initial_frame_caller_pc = initial_frame_caller_pc;
+	state->read_memory_func = read_memory_func;
+	state->read_memory_func_extra = read_memory_func_extra;
+}
+
+bool
+backtrace_next(BacktraceIterator *state)
+{
+	VirtualAddress next_fp, next_pc, next_frame[2];
+
+	if (state->fp == -1) {
+		/* No parent frame. */
+		return false;
+	}
+
+	/* Try to read the frame linkage data chaining to the next function. */
+	if (!state->read_memory_func(&next_frame, state->fp, sizeof next_frame,
+				     state->read_memory_func_extra)) {
+		return false;
+	}
+
+	next_fp = next_frame[1];
+	if (next_fp % 4 != 0) {
+		/* Caller's frame pointer is suspect, so give up.
+		 * Technically it should be aligned mod 8, but we will
+		 * be forgiving here.
+		 */
+		return false;
+	}
+
+	if (state->initial_frame_caller_pc != -1) {
+		/* We must be in the initial stack frame and already know the
+		 * caller PC.
+		 */
+		next_pc = state->initial_frame_caller_pc;
+
+		/* Force reading stack next time, in case we were in the
+		 * initial frame.  We don't do this above just to paranoidly
+		 * avoid changing the struct at all when we return false.
+		 */
+		state->initial_frame_caller_pc = -1;
+	} else {
+		/* Get the caller PC from the frame linkage area. */
+		next_pc = next_frame[0];
+		if (next_pc == 0 ||
+		    next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) {
+			/* The PC is suspect, so give up. */
+			return false;
+		}
+	}
+
+	/* Update state to become the caller's stack frame. */
+	state->pc = next_pc;
+	state->sp = state->fp;
+	state->fp = next_fp;
+
+	return true;
+}
+
+#else /* TILE_CHIP < 10 */
+
+void
+backtrace_init(BacktraceIterator *state,
+	       BacktraceMemoryReader read_memory_func,
+	       void *read_memory_func_extra,
+	       VirtualAddress pc, VirtualAddress lr,
+	       VirtualAddress sp, VirtualAddress r52)
+{
+	state->pc = pc;
+	state->sp = sp;
+	state->fp = -1;
+	state->initial_frame_caller_pc = -1;
+	state->read_memory_func = read_memory_func;
+	state->read_memory_func_extra = read_memory_func_extra;
+}
+
+bool backtrace_next(BacktraceIterator *state) { return false; }
+
+#endif /* TILE_CHIP < 10 */
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
new file mode 100644
index 0000000..a374c99
--- /dev/null
+++ b/arch/tile/kernel/compat.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/* Adjust unistd.h to provide 32-bit numbers and functions. */
+#define __SYSCALL_COMPAT
+
+#include <linux/compat.h>
+#include <linux/msg.h>
+#include <linux/syscalls.h>
+#include <linux/kdev_t.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/signal.h>
+#include <asm/syscalls.h>
+
+/*
+ * Syscalls that take 64-bit numbers traditionally take them in 32-bit
+ * "high" and "low" value parts on 32-bit architectures.
+ * In principle, one could imagine passing some register arguments as
+ * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to
+ * adapt the usual convention.
+ */
+
+long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high)
+{
+	return sys_truncate(filename, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high)
+{
+	return sys_ftruncate(fd, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
+			u32 dummy, u32 low, u32 high)
+{
+	return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
+			 u32 dummy, u32 low, u32 high)
+{
+	return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len)
+{
+	return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len);
+}
+
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+				 u32 offset_lo, u32 offset_hi,
+				 u32 nbytes_lo, u32 nbytes_hi)
+{
+	return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo,
+				   ((loff_t)nbytes_hi << 32) | nbytes_lo,
+				   flags);
+}
+
+long compat_sys_fallocate(int fd, int mode,
+			  u32 offset_lo, u32 offset_hi,
+			  u32 len_lo, u32 len_hi)
+{
+	return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo,
+			     ((loff_t)len_hi << 32) | len_lo);
+}
+
+
+
+long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+				      struct compat_timespec __user *interval)
+{
+	struct timespec t;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
+	set_fs(old_fs);
+	if (put_compat_timespec(&t, interval))
+		return -EFAULT;
+	return ret;
+}
+
+ssize_t compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
+			    size_t count)
+{
+	mm_segment_t old_fs = get_fs();
+	int ret;
+	off_t of;
+
+	if (offset && get_user(of, offset))
+		return -EFAULT;
+
+	set_fs(KERNEL_DS);
+	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
+			   count);
+	set_fs(old_fs);
+
+	if (offset && put_user(of, offset))
+		return -EFAULT;
+	return ret;
+}
+
+
+/*
+ * The usual compat_sys_msgsnd() and _msgrcv() seem to be assuming
+ * some different calling convention than our normal 32-bit tile code.
+ */
+
+/* Already defined in ipc/compat.c, but we need it here. */
+struct compat_msgbuf {
+	compat_long_t mtype;
+	char mtext[1];
+};
+
+long tile_compat_sys_msgsnd(int msqid,
+			    struct compat_msgbuf __user *msgp,
+			    size_t msgsz, int msgflg)
+{
+	compat_long_t mtype;
+
+	if (get_user(mtype, &msgp->mtype))
+		return -EFAULT;
+	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
+}
+
+long tile_compat_sys_msgrcv(int msqid,
+			    struct compat_msgbuf __user *msgp,
+			    size_t msgsz, long msgtyp, int msgflg)
+{
+	long err, mtype;
+
+	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
+	if (err < 0)
+		goto out;
+
+	if (put_user(mtype, &msgp->mtype))
+		err = -EFAULT;
+ out:
+	return err;
+}
+
+/* Provide the compat syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (compat_##call),
+
+/* The generic versions of these don't work for Tile. */
+#define compat_sys_msgrcv tile_compat_sys_msgrcv
+#define compat_sys_msgsnd tile_compat_sys_msgsnd
+
+/* See comments in sys.c */
+#define compat_sys_fadvise64 sys32_fadvise64
+#define compat_sys_fadvise64_64 sys32_fadvise64_64
+#define compat_sys_readahead sys32_readahead
+#define compat_sys_sync_file_range compat_sys_sync_file_range2
+
+/* The native 64-bit "struct stat" matches the 32-bit "struct stat64". */
+#define compat_sys_stat64 sys_newstat
+#define compat_sys_lstat64 sys_newlstat
+#define compat_sys_fstat64 sys_newfstat
+#define compat_sys_fstatat64 sys_newfstatat
+
+/* Pass full 64-bit values through ptrace. */
+#define compat_sys_ptrace tile_compat_sys_ptrace
+
+void *compat_sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
new file mode 100644
index 0000000..9fa4ba8
--- /dev/null
+++ b/arch/tile/kernel/compat_signal.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/sigframe.h>
+#include <arch/interrupts.h>
+
+struct compat_sigaction {
+	compat_uptr_t sa_handler;
+	compat_ulong_t sa_flags;
+	compat_uptr_t sa_restorer;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct compat_sigaltstack {
+	compat_uptr_t ss_sp;
+	int ss_flags;
+	compat_size_t ss_size;
+};
+
+struct compat_ucontext {
+	compat_ulong_t	  uc_flags;
+	compat_uptr_t     uc_link;
+	struct compat_sigaltstack	  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[SI_PAD_SIZE];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+			int _overrun_incr;	/* amount to add to overrun */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+			int _trapno;	/* TRAP # which caused the signal */
+#endif
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+};
+
+struct compat_rt_sigframe {
+	unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
+	struct compat_siginfo info;
+	struct compat_ucontext uc;
+};
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
+			     struct compat_sigaction __user *oact,
+			     size_t sigsetsize)
+{
+	struct k_sigaction new_sa, old_sa;
+	int ret = -EINVAL;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		goto out;
+
+	if (act) {
+		compat_uptr_t handler, restorer;
+
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(handler, &act->sa_handler) ||
+		    __get_user(new_sa.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(restorer, &act->sa_restorer) ||
+		    __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask,
+				     sizeof(sigset_t)))
+			return -EFAULT;
+		new_sa.sa.sa_handler = compat_ptr(handler);
+		new_sa.sa.sa_restorer = compat_ptr(restorer);
+	}
+
+	ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(ptr_to_compat(old_sa.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_sa.sa.sa_restorer),
+			       &oact->sa_restorer) ||
+		    __put_user(old_sa.sa.sa_flags, &oact->sa_flags) ||
+		    __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask,
+				   sizeof(sigset_t)))
+			return -EFAULT;
+	}
+out:
+	return ret;
+}
+
+long compat_sys_rt_sigqueueinfo(int pid, int sig,
+				struct compat_siginfo __user *uinfo)
+{
+	siginfo_t info;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	set_fs(KERNEL_DS);
+	ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
+	set_fs(old_fs);
+	return ret;
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+{
+	int err;
+
+	if (!access_ok(VERIFY_WRITE, to, sizeof(struct compat_siginfo)))
+		return -EFAULT;
+
+	/* If you change siginfo_t structure, please make sure that
+	   this code is fixed accordingly.
+	   It should never copy any pad contained in the structure
+	   to avoid security leaks, but must copy the generic
+	   3 ints plus the relevant union member.  */
+	err = __put_user(from->si_signo, &to->si_signo);
+	err |= __put_user(from->si_errno, &to->si_errno);
+	err |= __put_user((short)from->si_code, &to->si_code);
+
+	if (from->si_code < 0) {
+		err |= __put_user(from->si_pid, &to->si_pid);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+	} else {
+		/*
+		 * First 32bits of unions are always present:
+		 * si_pid === si_band === si_tid === si_addr(LS half)
+		 */
+		err |= __put_user(from->_sifields._pad[0],
+				  &to->_sifields._pad[0]);
+		switch (from->si_code >> 16) {
+		case __SI_FAULT >> 16:
+			break;
+		case __SI_CHLD >> 16:
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+			/* FALL THROUGH */
+		default:
+		case __SI_KILL >> 16:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			break;
+		case __SI_POLL >> 16:
+			err |= __put_user(from->si_fd, &to->si_fd);
+			break;
+		case __SI_TIMER >> 16:
+			err |= __put_user(from->si_overrun, &to->si_overrun);
+			err |= __put_user(ptr_to_compat(from->si_ptr),
+					  &to->si_ptr);
+			break;
+			 /* This is not generated by the kernel as of now.  */
+		case __SI_RT >> 16:
+		case __SI_MESGQ >> 16:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_int, &to->si_int);
+			break;
+		}
+	}
+	return err;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
+{
+	int err;
+	u32 ptr32;
+
+	if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
+		return -EFAULT;
+
+	err = __get_user(to->si_signo, &from->si_signo);
+	err |= __get_user(to->si_errno, &from->si_errno);
+	err |= __get_user(to->si_code, &from->si_code);
+
+	err |= __get_user(to->si_pid, &from->si_pid);
+	err |= __get_user(to->si_uid, &from->si_uid);
+	err |= __get_user(ptr32, &from->si_ptr);
+	to->si_ptr = compat_ptr(ptr32);
+
+	return err;
+}
+
+long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+			     struct compat_sigaltstack __user *uoss_ptr,
+			     struct pt_regs *regs)
+{
+	stack_t uss, uoss;
+	int ret;
+	mm_segment_t seg;
+
+	if (uss_ptr) {
+		u32 ptr;
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) ||
+			    __get_user(ptr, &uss_ptr->ss_sp) ||
+			    __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
+			    __get_user(uss.ss_size, &uss_ptr->ss_size))
+			return -EFAULT;
+		uss.ss_sp = compat_ptr(ptr);
+	}
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss,
+			     (unsigned long)compat_ptr(regs->sp));
+	set_fs(seg);
+	if (ret >= 0 && uoss_ptr)  {
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) ||
+		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+			ret = -EFAULT;
+	}
+	return ret;
+}
+
+long _compat_sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct compat_rt_sigframe __user *frame =
+		(struct compat_rt_sigframe __user *) compat_ptr(regs->sp);
+	sigset_t set;
+	long r0;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
+		goto badframe;
+
+	if (_compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0)
+		goto badframe;
+
+	return r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *compat_get_sigframe(struct k_sigaction *ka,
+					       struct pt_regs *regs,
+					       size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = (unsigned long)compat_ptr(regs->sp);
+
+	/*
+	 * If we are on the alternate signal stack and would overflow
+	 * it, don't.  Return an always-bogus address instead so we
+	 * will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	sp -= frame_size;
+	/*
+	 * Align the stack pointer according to the TILE ABI,
+	 * i.e. so that on function entry (sp & 15) == 0.
+	 */
+	sp &= -16UL;
+	return (void __user *) sp;
+}
+
+int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			  sigset_t *set, struct pt_regs *regs)
+{
+	unsigned long restorer;
+	struct compat_rt_sigframe __user *frame;
+	int err = 0;
+	int usig;
+
+	frame = compat_get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	/* Always write at least the signal number for the stack backtracer. */
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		/* At sigreturn time, restore the callee-save registers too. */
+		err |= copy_siginfo_to_user32(&frame->info, info);
+		regs->flags |= PT_FLAGS_RESTORE_REGS;
+	} else {
+		err |= __put_user(info->si_signo, &frame->info.si_signo);
+	}
+
+	/* Create the ucontext.  */
+	err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)),
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
+
+	restorer = VDSO_BASE;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ptr_to_compat_reg(ka->sa.sa_restorer);
+
+	/*
+	 * Set up registers for signal handler.
+	 * Registers that we don't modify keep the value they had from
+	 * user-space at the time we took the signal.
+	 */
+	regs->pc = ptr_to_compat_reg(ka->sa.sa_handler);
+	regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
+	regs->sp = ptr_to_compat_reg(frame);
+	regs->lr = restorer;
+	regs->regs[0] = (unsigned long) usig;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		/* Need extra arguments, so mark to restore caller-saves. */
+		regs->regs[1] = ptr_to_compat_reg(&frame->info);
+		regs->regs[2] = ptr_to_compat_reg(&frame->uc);
+		regs->flags |= PT_FLAGS_CALLER_SAVES;
+	}
+
+	/*
+	 * Notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+	return 0;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+	return -EFAULT;
+}
diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
new file mode 100644
index 0000000..e44d441
--- /dev/null
+++ b/arch/tile/kernel/early_printk.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <asm/setup.h>
+#include <hv/hypervisor.h>
+
+static void early_hv_write(struct console *con, const char *s, unsigned n)
+{
+	hv_console_write((HV_VirtAddr) s, n);
+}
+
+static struct console early_hv_console = {
+	.name =		"earlyhv",
+	.write =	early_hv_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
+/* Direct interface for emergencies */
+struct console *early_console = &early_hv_console;
+static int early_console_initialized;
+static int early_console_complete;
+
+static void early_vprintk(const char *fmt, va_list ap)
+{
+	char buf[512];
+	int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+	early_console->write(early_console, buf, n);
+}
+
+void early_printk(const char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	early_vprintk(fmt, ap);
+	va_end(ap);
+}
+
+void early_panic(const char *fmt, ...)
+{
+	va_list ap;
+	raw_local_irq_disable_all();
+	va_start(ap, fmt);
+	early_printk("Kernel panic - not syncing: ");
+	early_vprintk(fmt, ap);
+	early_console->write(early_console, "\n", 1);
+	va_end(ap);
+	dump_stack();
+	hv_halt();
+}
+
+static int __initdata keep_early;
+
+static int __init setup_early_printk(char *str)
+{
+	if (early_console_initialized)
+		return 1;
+
+	if (str != NULL && strncmp(str, "keep", 4) == 0)
+		keep_early = 1;
+
+	early_console = &early_hv_console;
+	early_console_initialized = 1;
+	register_console(early_console);
+
+	return 0;
+}
+
+void __init disable_early_printk(void)
+{
+	early_console_complete = 1;
+	if (!early_console_initialized || !early_console)
+		return;
+	if (!keep_early) {
+		early_printk("disabling early console\n");
+		unregister_console(early_console);
+		early_console_initialized = 0;
+	} else {
+		early_printk("keeping early console\n");
+	}
+}
+
+void warn_early_printk(void)
+{
+	if (early_console_complete || early_console_initialized)
+		return;
+	early_printk("\
+Machine shutting down before console output is fully initialized.\n\
+You may wish to reboot and add the option 'earlyprintk' to your\n\
+boot command line to see any diagnostic early console output.\n\
+");
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
new file mode 100644
index 0000000..136261f
--- /dev/null
+++ b/arch/tile/kernel/entry.S
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/linkage.h>
+#include <arch/abi.h>
+#include <asm/unistd.h>
+#include <asm/irqflags.h>
+
+#ifdef __tilegx__
+#define bnzt bnezt
+#endif
+
+STD_ENTRY(current_text_addr)
+	{ move r0, lr; jrp lr }
+	STD_ENDPROC(current_text_addr)
+
+STD_ENTRY(_sim_syscall)
+	/*
+	 * Wait for r0-r9 to be ready (and lr on the off chance we
+	 * want the syscall to locate its caller), then make a magic
+	 * simulator syscall.
+	 *
+	 * We carefully stall until the registers are readable in case they
+	 * are the target of a slow load, etc. so that tile-sim will
+	 * definitely be able to read all of them inside the magic syscall.
+	 *
+	 * Technically this is wrong for r3-r9 and lr, since an interrupt
+	 * could come in and restore the registers with a slow load right
+	 * before executing the mtspr. We may need to modify tile-sim to
+	 * explicitly stall for this case, but we do not yet have
+	 * a way to implement such a stall.
+	 */
+	{ and zero, lr, r9 ; and zero, r8, r7 }
+	{ and zero, r6, r5 ; and zero, r4, r3 }
+	{ and zero, r2, r1 ; mtspr SIM_CONTROL, r0 }
+	{ jrp lr }
+	STD_ENDPROC(_sim_syscall)
+
+/*
+ * Implement execve().  The i386 code has a note that forking from kernel
+ * space results in no copy on write until the execve, so we should be
+ * careful not to write to the stack here.
+ */
+STD_ENTRY(kernel_execve)
+	moveli TREG_SYSCALL_NR_NAME, __NR_execve
+	swint1
+	jrp lr
+	STD_ENDPROC(kernel_execve)
+
+/* Delay a fixed number of cycles. */
+STD_ENTRY(__delay)
+	{ addi r0, r0, -1; bnzt r0, . }
+	jrp lr
+	STD_ENDPROC(__delay)
+
+/*
+ * We don't run this function directly, but instead copy it to a page
+ * we map into every user process.  See vdso_setup().
+ *
+ * Note that libc has a copy of this function that it uses to compare
+ * against the PC when a stack backtrace ends, so if this code is
+ * changed, the libc implementation(s) should also be updated.
+ */
+	.pushsection .data
+ENTRY(__rt_sigreturn)
+	moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn
+	swint1
+	ENDPROC(__rt_sigreturn)
+	ENTRY(__rt_sigreturn_end)
+	.popsection
+
+STD_ENTRY(dump_stack)
+	{ move r2, lr; lnk r1 }
+	{ move r4, r52; addli r1, r1, dump_stack - . }
+	{ move r3, sp; j _dump_stack }
+	jrp lr   /* keep backtracer happy */
+	STD_ENDPROC(dump_stack)
+
+STD_ENTRY(KBacktraceIterator_init_current)
+	{ move r2, lr; lnk r1 }
+	{ move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
+	{ move r3, sp; j _KBacktraceIterator_init_current }
+	jrp lr   /* keep backtracer happy */
+	STD_ENDPROC(KBacktraceIterator_init_current)
+
+/*
+ * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then
+ * free the old stack (passed in r0) and re-invoke cpu_idle().
+ * We update sp and ksp0 simultaneously to avoid backtracer warnings.
+ */
+STD_ENTRY(cpu_idle_on_new_stack)
+	{
+	 move sp, r1
+	 mtspr SYSTEM_SAVE_1_0, r2
+	}
+	jal free_thread_info
+	j cpu_idle
+	STD_ENDPROC(cpu_idle_on_new_stack)
+
+/* Loop forever on a nap during SMP boot. */
+STD_ENTRY(smp_nap)
+	nap
+	j smp_nap /* we are not architecturally guaranteed not to exit nap */
+	jrp lr    /* clue in the backtracer */
+	STD_ENDPROC(smp_nap)
+
+/*
+ * Enable interrupts racelessly and then nap until interrupted.
+ * This function's _cpu_idle_nap address is special; see intvec.S.
+ * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and
+ * as a result return to the function that called _cpu_idle().
+ */
+STD_ENTRY(_cpu_idle)
+	{
+	 lnk r0
+	 movei r1, 1
+	}
+	{
+	 addli r0, r0, _cpu_idle_nap - .
+	 mtspr INTERRUPT_CRITICAL_SECTION, r1
+	}
+	IRQ_ENABLE(r2, r3)         /* unmask, but still with ICS set */
+	mtspr EX_CONTEXT_1_1, r1   /* PL1, ICS clear */
+	mtspr EX_CONTEXT_1_0, r0
+	iret
+	.global _cpu_idle_nap
+_cpu_idle_nap:
+	nap
+	jrp lr
+	STD_ENDPROC(_cpu_idle)
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
new file mode 100644
index 0000000..2b4f6c0
--- /dev/null
+++ b/arch/tile/kernel/head_32.S
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * TILE startup code.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+/*
+ * This module contains the entry code for kernel images. It performs the
+ * minimal setup needed to call the generic C routines.
+ */
+
+	__HEAD
+ENTRY(_start)
+	/* Notify the hypervisor of what version of the API we want */
+	{
+	  movei r1, TILE_CHIP
+	  movei r2, TILE_CHIP_REV
+	}
+	{
+	  moveli r0, _HV_VERSION
+	  jal hv_init
+	}
+	/* Get a reasonable default ASID in r0 */
+	{
+	  move r0, zero
+	  jal hv_inquire_asid
+	}
+	/* Install the default page table */
+	{
+	  moveli r6, lo16(swapper_pgprot - PAGE_OFFSET)
+	  move r4, r0     /* use starting ASID of range for this page table */
+	}
+	{
+	  moveli r0, lo16(swapper_pg_dir - PAGE_OFFSET)
+	  auli r6, r6, ha16(swapper_pgprot - PAGE_OFFSET)
+	}
+	{
+	  lw r2, r6
+	  addi r6, r6, 4
+	}
+	{
+	  lw r3, r6
+	  auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET)
+	}
+	{
+	  inv r6
+	  move r1, zero   /* high 32 bits of CPA is zero */
+	}
+	{
+	  moveli lr, lo16(1f)
+	  move r5, zero
+	}
+	{
+	  auli lr, lr, ha16(1f)
+	  j hv_install_context
+	}
+1:
+
+	/* Get our processor number and save it away in SAVE_1_0. */
+	jal hv_inquire_topology
+	mulll_uu r4, r1, r2        /* r1 == y, r2 == width */
+	add r4, r4, r0             /* r0 == x, so r4 == cpu == y*width + x */
+
+#ifdef CONFIG_SMP
+	/*
+	 * Load up our per-cpu offset.  When the first (master) tile
+	 * boots, this value is still zero, so we will load boot_pc
+	 * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+	 * The master tile initializes the per-cpu offset array, so that
+	 * when subsequent (secondary) tiles boot, they will instead load
+	 * from their per-cpu versions of boot_sp and boot_pc.
+	 */
+	moveli r5, lo16(__per_cpu_offset)
+	auli r5, r5, ha16(__per_cpu_offset)
+	s2a r5, r4, r5
+	lw r5, r5
+	bnz r5, 1f
+
+	/*
+	 * Save the width and height to the smp_topology variable
+	 * for later use.
+	 */
+	moveli r0, lo16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+	auli r0, r0, ha16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+	{
+	  sw r0, r2
+	  addi r0, r0, (HV_TOPOLOGY_HEIGHT_OFFSET - HV_TOPOLOGY_WIDTH_OFFSET)
+	}
+	sw r0, r3
+1:
+#else
+	move r5, zero
+#endif
+
+	/* Load and go with the correct pc and sp. */
+	{
+	  addli r1, r5, lo16(boot_sp)
+	  addli r0, r5, lo16(boot_pc)
+	}
+	{
+	  auli r1, r1, ha16(boot_sp)
+	  auli r0, r0, ha16(boot_pc)
+	}
+	lw r0, r0
+	lw sp, r1
+	or r4, sp, r4
+	mtspr SYSTEM_SAVE_1_0, r4  /* save ksp0 + cpu */
+	addi sp, sp, -STACK_TOP_DELTA
+	{
+	  move lr, zero   /* stop backtraces in the called function */
+	  jr r0
+	}
+	ENDPROC(_start)
+
+.section ".bss.page_aligned","w"
+	.align PAGE_SIZE
+ENTRY(empty_zero_page)
+	.fill PAGE_SIZE,1,0
+	END(empty_zero_page)
+
+	.macro PTE va, cpa, bits1, no_org=0
+	.ifeq \no_org
+	.org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE
+	.endif
+	.word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \
+	      (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
+	.word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN)
+	.endm
+
+.section ".data.page_aligned","wa"
+	.align PAGE_SIZE
+ENTRY(swapper_pg_dir)
+	/*
+	 * All data pages from PAGE_OFFSET to MEM_USER_INTRPT are mapped as
+	 * VA = PA + PAGE_OFFSET.  We remap things with more precise access
+	 * permissions and more respect for size of RAM later.
+	 */
+	.set addr, 0
+	.rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT
+	PTE addr + PAGE_OFFSET, addr, HV_PTE_READABLE | HV_PTE_WRITABLE
+	.set addr, addr + PGDIR_SIZE
+	.endr
+
+	/* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
+	PTE MEM_SV_INTRPT, 0, HV_PTE_READABLE | HV_PTE_EXECUTABLE
+	.org swapper_pg_dir + HV_L1_SIZE
+	END(swapper_pg_dir)
+
+	/*
+	 * Isolate swapper_pgprot to its own cache line, since each cpu
+	 * starting up will read it using VA-is-PA and local homing.
+	 * This would otherwise likely conflict with other data on the cache
+	 * line, once we have set its permanent home in the page tables.
+	 */
+	__INITDATA
+	.align CHIP_L2_LINE_SIZE()
+ENTRY(swapper_pgprot)
+	PTE	0, 0, HV_PTE_READABLE | HV_PTE_WRITABLE, 1
+	.align CHIP_L2_LINE_SIZE()
+	END(swapper_pgprot)
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
new file mode 100644
index 0000000..698489b
--- /dev/null
+++ b/arch/tile/kernel/hvglue.lds
@@ -0,0 +1,56 @@
+/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
+hv_init = TEXT_OFFSET + 0x10020;
+hv_install_context = TEXT_OFFSET + 0x10040;
+hv_sysconf = TEXT_OFFSET + 0x10060;
+hv_get_rtc = TEXT_OFFSET + 0x10080;
+hv_set_rtc = TEXT_OFFSET + 0x100a0;
+hv_flush_asid = TEXT_OFFSET + 0x100c0;
+hv_flush_page = TEXT_OFFSET + 0x100e0;
+hv_flush_pages = TEXT_OFFSET + 0x10100;
+hv_restart = TEXT_OFFSET + 0x10120;
+hv_halt = TEXT_OFFSET + 0x10140;
+hv_power_off = TEXT_OFFSET + 0x10160;
+hv_inquire_physical = TEXT_OFFSET + 0x10180;
+hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0;
+hv_inquire_virtual = TEXT_OFFSET + 0x101c0;
+hv_inquire_asid = TEXT_OFFSET + 0x101e0;
+hv_nanosleep = TEXT_OFFSET + 0x10200;
+hv_console_read_if_ready = TEXT_OFFSET + 0x10220;
+hv_console_write = TEXT_OFFSET + 0x10240;
+hv_downcall_dispatch = TEXT_OFFSET + 0x10260;
+hv_inquire_topology = TEXT_OFFSET + 0x10280;
+hv_fs_findfile = TEXT_OFFSET + 0x102a0;
+hv_fs_fstat = TEXT_OFFSET + 0x102c0;
+hv_fs_pread = TEXT_OFFSET + 0x102e0;
+hv_physaddr_read64 = TEXT_OFFSET + 0x10300;
+hv_physaddr_write64 = TEXT_OFFSET + 0x10320;
+hv_get_command_line = TEXT_OFFSET + 0x10340;
+hv_set_caching = TEXT_OFFSET + 0x10360;
+hv_bzero_page = TEXT_OFFSET + 0x10380;
+hv_register_message_state = TEXT_OFFSET + 0x103a0;
+hv_send_message = TEXT_OFFSET + 0x103c0;
+hv_receive_message = TEXT_OFFSET + 0x103e0;
+hv_inquire_context = TEXT_OFFSET + 0x10400;
+hv_start_all_tiles = TEXT_OFFSET + 0x10420;
+hv_dev_open = TEXT_OFFSET + 0x10440;
+hv_dev_close = TEXT_OFFSET + 0x10460;
+hv_dev_pread = TEXT_OFFSET + 0x10480;
+hv_dev_pwrite = TEXT_OFFSET + 0x104a0;
+hv_dev_poll = TEXT_OFFSET + 0x104c0;
+hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0;
+hv_dev_preada = TEXT_OFFSET + 0x10500;
+hv_dev_pwritea = TEXT_OFFSET + 0x10520;
+hv_flush_remote = TEXT_OFFSET + 0x10540;
+hv_console_putc = TEXT_OFFSET + 0x10560;
+hv_inquire_tiles = TEXT_OFFSET + 0x10580;
+hv_confstr = TEXT_OFFSET + 0x105a0;
+hv_reexec = TEXT_OFFSET + 0x105c0;
+hv_set_command_line = TEXT_OFFSET + 0x105e0;
+hv_dev_register_intr_state = TEXT_OFFSET + 0x10600;
+hv_enable_intr = TEXT_OFFSET + 0x10620;
+hv_disable_intr = TEXT_OFFSET + 0x10640;
+hv_trigger_ipi = TEXT_OFFSET + 0x10660;
+hv_store_mapping = TEXT_OFFSET + 0x10680;
+hv_inquire_realpa = TEXT_OFFSET + 0x106a0;
+hv_flush_all = TEXT_OFFSET + 0x106c0;
+hv_glue_internals = TEXT_OFFSET + 0x106e0;
diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c
new file mode 100644
index 0000000..928b318
--- /dev/null
+++ b/arch/tile/kernel/init_task.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+#include <linux/module.h>
+#include <linux/start_kernel.h>
+#include <linux/uaccess.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union __init_task_data = {
+	INIT_THREAD_INFO(init_task)
+};
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
+
+/*
+ * per-CPU stack and boot info.
+ */
+DEFINE_PER_CPU(unsigned long, boot_sp) =
+	(unsigned long)init_stack + THREAD_SIZE;
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
+#else
+/*
+ * The variable must be __initdata since it references __init code.
+ * With CONFIG_SMP it is per-cpu data, which is exempt from validation.
+ */
+unsigned long __initdata boot_pc = (unsigned long)start_kernel;
+#endif
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
new file mode 100644
index 0000000..207271f
--- /dev/null
+++ b/arch/tile/kernel/intvec_32.S
@@ -0,0 +1,2006 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Linux interrupt vectors.
+ */
+
+#include <linux/linkage.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/irqflags.h>
+#include <asm/atomic.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+#include <arch/abi.h>
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+#ifdef CONFIG_PREEMPT
+# error "No support for kernel preemption currently"
+#endif
+
+#if INT_INTCTRL_1 < 32 || INT_INTCTL_1 >= 48
+# error INT_INTCTRL_1 coded to set high interrupt mask
+#endif
+
+#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
+
+#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+
+#if !CHIP_HAS_WH64()
+	/* By making this an empty macro, we can use wh64 in the code. */
+	.macro  wh64 reg
+	.endm
+#endif
+
+	.macro  push_reg reg, ptr=sp, delta=-4
+	{
+	 sw     \ptr, \reg
+	 addli  \ptr, \ptr, \delta
+	}
+	.endm
+
+	.macro  pop_reg reg, ptr=sp, delta=4
+	{
+	 lw     \reg, \ptr
+	 addli  \ptr, \ptr, \delta
+	}
+	.endm
+
+	.macro  pop_reg_zero reg, zreg, ptr=sp, delta=4
+	{
+	 move   \zreg, zero
+	 lw     \reg, \ptr
+	 addi   \ptr, \ptr, \delta
+	}
+	.endm
+
+	.macro  push_extra_callee_saves reg
+	PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51))
+	push_reg r51, \reg
+	push_reg r50, \reg
+	push_reg r49, \reg
+	push_reg r48, \reg
+	push_reg r47, \reg
+	push_reg r46, \reg
+	push_reg r45, \reg
+	push_reg r44, \reg
+	push_reg r43, \reg
+	push_reg r42, \reg
+	push_reg r41, \reg
+	push_reg r40, \reg
+	push_reg r39, \reg
+	push_reg r38, \reg
+	push_reg r37, \reg
+	push_reg r36, \reg
+	push_reg r35, \reg
+	push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34)
+	.endm
+
+	.macro  panic str
+	.pushsection .rodata, "a"
+1:
+	.asciz  "\str"
+	.popsection
+	{
+	 moveli r0, lo16(1b)
+	}
+	{
+	 auli   r0, r0, ha16(1b)
+	 jal    panic
+	}
+	.endm
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+	.pushsection .text.intvec_feedback,"ax"
+intvec_feedback:
+	.popsection
+#endif
+
+	/*
+	 * Default interrupt handler.
+	 *
+	 * vecnum is where we'll put this code.
+	 * c_routine is the C routine we'll call.
+	 *
+	 * The C routine is passed two arguments:
+	 * - A pointer to the pt_regs state.
+	 * - The interrupt vector number.
+	 *
+	 * The "processing" argument specifies the code for processing
+	 * the interrupt. Defaults to "handle_interrupt".
+	 */
+	.macro  int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+	.org    (\vecnum << 8)
+intvec_\vecname:
+	.ifc    \vecnum, INT_SWINT_1
+	blz     TREG_SYSCALL_NR_NAME, sys_cmpxchg
+	.endif
+
+	/* Temporarily save a register so we have somewhere to work. */
+
+	mtspr   SYSTEM_SAVE_1_1, r0
+	mfspr   r0, EX_CONTEXT_1_1
+
+	/* The cmpxchg code clears sp to force us to reset it here on fault. */
+	{
+	 bz     sp, 2f
+	 andi   r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK  /* mask off ICS */
+	}
+
+	.ifc    \vecnum, INT_DOUBLE_FAULT
+	/*
+	 * For double-faults from user-space, fall through to the normal
+	 * register save and stack setup path.  Otherwise, it's the
+	 * hypervisor giving us one last chance to dump diagnostics, and we
+	 * branch to the kernel_double_fault routine to do so.
+	 */
+	bz      r0, 1f
+	j       _kernel_double_fault
+1:
+	.else
+	/*
+	 * If we're coming from user-space, then set sp to the top of
+	 * the kernel stack.  Otherwise, assume sp is already valid.
+	 */
+	{
+	 bnz    r0, 0f
+	 move   r0, sp
+	}
+	.endif
+
+	.ifc    \c_routine, do_page_fault
+	/*
+	 * The page_fault handler may be downcalled directly by the
+	 * hypervisor even when Linux is running and has ICS set.
+	 *
+	 * In this case the contents of EX_CONTEXT_1_1 reflect the
+	 * previous fault and can't be relied on to choose whether or
+	 * not to reinitialize the stack pointer.  So we add a test
+	 * to see whether SYSTEM_SAVE_1_2 has the high bit set,
+	 * and if so we don't reinitialize sp, since we must be coming
+	 * from Linux.  (In fact the precise case is !(val & ~1),
+	 * but any Linux PC has to have the high bit set.)
+	 *
+	 * Note that the hypervisor *always* sets SYSTEM_SAVE_1_2 for
+	 * any path that turns into a downcall to one of our TLB handlers.
+	 */
+	mfspr   r0, SYSTEM_SAVE_1_2
+	{
+	 blz    r0, 0f    /* high bit in S_S_1_2 is for a PC to use */
+	 move   r0, sp
+	}
+	.endif
+
+2:
+	/*
+	 * SYSTEM_SAVE_1_0 holds the cpu number in the low bits, and
+	 * the current stack top in the higher bits.  So we recover
+	 * our stack top by just masking off the low bits, then
+	 * point sp at the top aligned address on the actual stack page.
+	 */
+	mfspr   r0, SYSTEM_SAVE_1_0
+	mm      r0, r0, zero, LOG2_THREAD_SIZE, 31
+
+0:
+	/*
+	 * Align the stack mod 64 so we can properly predict what
+	 * cache lines we need to write-hint to reduce memory fetch
+	 * latency as we enter the kernel.  The layout of memory is
+	 * as follows, with cache line 0 at the lowest VA, and cache
+	 * line 4 just below the r0 value this "andi" computes.
+	 * Note that we never write to cache line 4, and we skip
+	 * cache line 1 for syscalls.
+	 *
+	 *    cache line 4: ptregs padding (two words)
+	 *    cache line 3: r46...lr, pc, ex1, faultnum, orig_r0, flags, pad
+	 *    cache line 2: r30...r45
+	 *    cache line 1: r14...r29
+	 *    cache line 0: 2 x frame, r0..r13
+	 */
+	andi    r0, r0, -64
+
+	/*
+	 * Push the first four registers on the stack, so that we can set
+	 * them to vector-unique values before we jump to the common code.
+	 *
+	 * Registers are pushed on the stack as a struct pt_regs,
+	 * with the sp initially just above the struct, and when we're
+	 * done, sp points to the base of the struct, minus
+	 * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code.
+	 *
+	 * This routine saves just the first four registers, plus the
+	 * stack context so we can do proper backtracing right away,
+	 * and defers to handle_interrupt to save the rest.
+	 * The backtracer needs pc, ex1, lr, sp, r52, and faultnum.
+	 */
+	addli   r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP)
+	wh64    r0    /* cache line 3 */
+	{
+	 sw     r0, lr
+	 addli  r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+	}
+	{
+	 sw     r0, sp
+	 addli  sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP
+	}
+	{
+	 sw     sp, r52
+	 addli  sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52)
+	}
+	wh64    sp    /* cache line 0 */
+	{
+	 sw     sp, r1
+	 addli  sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1)
+	}
+	{
+	 sw     sp, r2
+	 addli  sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2)
+	}
+	{
+	 sw     sp, r3
+	 addli  sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3)
+	}
+	mfspr   r0, EX_CONTEXT_1_0
+	.ifc \processing,handle_syscall
+	/*
+	 * Bump the saved PC by one bundle so that when we return, we won't
+	 * execute the same swint instruction again.  We need to do this while
+	 * we're in the critical section.
+	 */
+	addi    r0, r0, 8
+	.endif
+	{
+	 sw     sp, r0
+	 addli  sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+	}
+	mfspr   r0, EX_CONTEXT_1_1
+	{
+	 sw     sp, r0
+	 addi   sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+	/*
+	 * Use r0 for syscalls so it's a temporary; use r1 for interrupts
+	 * so that it gets passed through unchanged to the handler routine.
+	 * Note that the .if conditional confusingly spans bundles.
+	 */
+	 .ifc \processing,handle_syscall
+	 movei  r0, \vecnum
+	}
+	{
+	 sw     sp, r0
+	 .else
+	 movei  r1, \vecnum
+	}
+	{
+	 sw     sp, r1
+	 .endif
+	 addli  sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM
+	}
+	mfspr   r0, SYSTEM_SAVE_1_1    /* Original r0 */
+	{
+	 sw     sp, r0
+	 addi   sp, sp, -PTREGS_OFFSET_REG(0) - 4
+	}
+	{
+	 sw     sp, zero        /* write zero into "Next SP" frame pointer */
+	 addi   sp, sp, -4      /* leave SP pointing at bottom of frame */
+	}
+	.ifc \processing,handle_syscall
+	j       handle_syscall
+	.else
+	/*
+	 * Capture per-interrupt SPR context to registers.
+	 * We overload the meaning of r3 on this path such that if its bit 31
+	 * is set, we have to mask all interrupts including NMIs before
+	 * clearing the interrupt critical section bit.
+	 * See discussion below at "finish_interrupt_save".
+	 */
+	.ifc \c_routine, do_page_fault
+	mfspr   r2, SYSTEM_SAVE_1_3   /* address of page fault */
+	mfspr   r3, SYSTEM_SAVE_1_2   /* info about page fault */
+	.else
+	.ifc \vecnum, INT_DOUBLE_FAULT
+	{
+	 mfspr  r2, SYSTEM_SAVE_1_2   /* double fault info from HV */
+	 movei  r3, 0
+	}
+	.else
+	.ifc \c_routine, do_trap
+	{
+	 mfspr  r2, GPV_REASON
+	 movei  r3, 0
+	}
+	.else
+	.ifc \c_routine, op_handle_perf_interrupt
+	{
+	 mfspr  r2, PERF_COUNT_STS
+	 movei  r3, -1   /* not used, but set for consistency */
+	}
+	.else
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+	.ifc \c_routine, op_handle_aux_perf_interrupt
+	{
+	 mfspr  r2, AUX_PERF_COUNT_STS
+	 movei  r3, -1   /* not used, but set for consistency */
+	}
+	.else
+#endif
+	movei   r3, 0
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+	.endif
+#endif
+	.endif
+	.endif
+	.endif
+	.endif
+	/* Put function pointer in r0 */
+	moveli  r0, lo16(\c_routine)
+	{
+	 auli   r0, r0, ha16(\c_routine)
+	 j       \processing
+	}
+	.endif
+	ENDPROC(intvec_\vecname)
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+	.pushsection .text.intvec_feedback,"ax"
+	.org    (\vecnum << 5)
+	FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+	jrp     lr
+	.popsection
+#endif
+
+	.endm
+
+
+	/*
+	 * Save the rest of the registers that we didn't save in the actual
+	 * vector itself.  We can't use r0-r10 inclusive here.
+	 */
+	.macro  finish_interrupt_save, function
+
+	/* If it's a syscall, save a proper orig_r0, otherwise just zero. */
+	PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0)
+	{
+	 .ifc \function,handle_syscall
+	 sw     r52, r0
+	 .else
+	 sw     r52, zero
+	 .endif
+	 PTREGS_PTR(r52, PTREGS_OFFSET_TP)
+	}
+
+	/*
+	 * For ordinary syscalls, we save neither caller- nor callee-
+	 * save registers, since the syscall invoker doesn't expect the
+	 * caller-saves to be saved, and the called kernel functions will
+	 * take care of saving the callee-saves for us.
+	 *
+	 * For interrupts we save just the caller-save registers.  Saving
+	 * them is required (since the "caller" can't save them).  Again,
+	 * the called kernel functions will restore the callee-save
+	 * registers for us appropriately.
+	 *
+	 * On return, we normally restore nothing special for syscalls,
+	 * and just the caller-save registers for interrupts.
+	 *
+	 * However, there are some important caveats to all this:
+	 *
+	 * - We always save a few callee-save registers to give us
+	 *   some scratchpad registers to carry across function calls.
+	 *
+	 * - fork/vfork/etc require us to save all the callee-save
+	 *   registers, which we do in PTREGS_SYSCALL_ALL_REGS, below.
+	 *
+	 * - We always save r0..r5 and r10 for syscalls, since we need
+	 *   to reload them a bit later for the actual kernel call, and
+	 *   since we might need them for -ERESTARTNOINTR, etc.
+	 *
+	 * - Before invoking a signal handler, we save the unsaved
+	 *   callee-save registers so they are visible to the
+	 *   signal handler or any ptracer.
+	 *
+	 * - If the unsaved callee-save registers are modified, we set
+	 *   a bit in pt_regs so we know to reload them from pt_regs
+	 *   and not just rely on the kernel function unwinding.
+	 *   (Done for ptrace register writes and SA_SIGINFO handler.)
+	 */
+	{
+	 sw     r52, tp
+	 PTREGS_PTR(r52, PTREGS_OFFSET_REG(33))
+	}
+	wh64    r52    /* cache line 2 */
+	push_reg r33, r52
+	push_reg r32, r52
+	push_reg r31, r52
+	.ifc \function,handle_syscall
+	push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30)
+	push_reg TREG_SYSCALL_NR_NAME, r52, \
+	  PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL
+	.else
+
+	push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30)
+	wh64    r52    /* cache line 1 */
+	push_reg r29, r52
+	push_reg r28, r52
+	push_reg r27, r52
+	push_reg r26, r52
+	push_reg r25, r52
+	push_reg r24, r52
+	push_reg r23, r52
+	push_reg r22, r52
+	push_reg r21, r52
+	push_reg r20, r52
+	push_reg r19, r52
+	push_reg r18, r52
+	push_reg r17, r52
+	push_reg r16, r52
+	push_reg r15, r52
+	push_reg r14, r52
+	push_reg r13, r52
+	push_reg r12, r52
+	push_reg r11, r52
+	push_reg r10, r52
+	push_reg r9, r52
+	push_reg r8, r52
+	push_reg r7, r52
+	push_reg r6, r52
+
+	.endif
+
+	push_reg r5, r52
+	sw      r52, r4
+
+	/* Load tp with our per-cpu offset. */
+#ifdef CONFIG_SMP
+	{
+	 mfspr  r20, SYSTEM_SAVE_1_0
+	 moveli r21, lo16(__per_cpu_offset)
+	}
+	{
+	 auli   r21, r21, ha16(__per_cpu_offset)
+	 mm     r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+	}
+	s2a     r20, r20, r21
+	lw      tp, r20
+#else
+	move    tp, zero
+#endif
+
+	/*
+	 * If we will be returning to the kernel, we will need to
+	 * reset the interrupt masks to the state they had before.
+	 * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+	 * We load flags in r32 here so we can jump to .Lrestore_regs
+	 * directly after do_page_fault_ics() if necessary.
+	 */
+	mfspr   r32, EX_CONTEXT_1_1
+	{
+	 andi   r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK  /* mask off ICS */
+	 PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
+	}
+	bzt     r32, 1f       /* zero if from user space */
+	IRQS_DISABLED(r32)    /* zero if irqs enabled */
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix
+#endif
+1:
+	.ifnc \function,handle_syscall
+	/* Record the fact that we saved the caller-save registers above. */
+	ori     r32, r32, PT_FLAGS_CALLER_SAVES
+	.endif
+	sw      r21, r32
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+	/*
+	 * Notify the feedback routines that we were in the
+	 * appropriate fixed interrupt vector area.  Note that we
+	 * still have ICS set at this point, so we can't invoke any
+	 * atomic operations or we will panic.  The feedback
+	 * routines internally preserve r0..r10 and r30 up.
+	 */
+	.ifnc \function,handle_syscall
+	shli    r20, r1, 5
+	.else
+	moveli  r20, INT_SWINT_1 << 5
+	.endif
+	addli   r20, r20, lo16(intvec_feedback)
+	auli    r20, r20, ha16(intvec_feedback)
+	jalr    r20
+
+	/* And now notify the feedback routines that we are here. */
+	FEEDBACK_ENTER(\function)
+#endif
+
+	/*
+	 * we've captured enough state to the stack (including in
+	 * particular our EX_CONTEXT state) that we can now release
+	 * the interrupt critical section and replace it with our
+	 * standard "interrupts disabled" mask value.  This allows
+	 * synchronous interrupts (and profile interrupts) to punch
+	 * through from this point onwards.
+	 *
+	 * If bit 31 of r3 is set during a non-NMI interrupt, we know we
+	 * are on the path where the hypervisor has punched through our
+	 * ICS with a page fault, so we call out to do_page_fault_ics()
+	 * to figure out what to do with it.  If the fault was in
+	 * an atomic op, we unlock the atomic lock, adjust the
+	 * saved register state a little, and return "zero" in r4,
+	 * falling through into the normal page-fault interrupt code.
+	 * If the fault was in a kernel-space atomic operation, then
+	 * do_page_fault_ics() resolves it itself, returns "one" in r4,
+	 * and as a result goes directly to restoring registers and iret,
+	 * without trying to adjust the interrupt masks at all.
+	 * The do_page_fault_ics() API involves passing and returning
+	 * a five-word struct (in registers) to avoid writing the
+	 * save and restore code here.
+	 */
+	.ifc \function,handle_nmi
+	IRQ_DISABLE_ALL(r20)
+	.else
+	.ifnc \function,handle_syscall
+	bgezt   r3, 1f
+	{
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	 jal    do_page_fault_ics
+	}
+	FEEDBACK_REENTER(\function)
+	bzt     r4, 1f
+	j       .Lrestore_regs
+1:
+	.endif
+	IRQ_DISABLE(r20, r21)
+	.endif
+	mtspr   INTERRUPT_CRITICAL_SECTION, zero
+
+#if CHIP_HAS_WH64()
+	/*
+	 * Prepare the first 256 stack bytes to be rapidly accessible
+	 * without having to fetch the background data.  We don't really
+	 * know how far to write-hint, but kernel stacks generally
+	 * aren't that big, and write-hinting here does take some time.
+	 */
+	addi    r52, sp, -64
+	{
+	 wh64   r52
+	 addi   r52, r52, -64
+	}
+	{
+	 wh64   r52
+	 addi   r52, r52, -64
+	}
+	{
+	 wh64   r52
+	 addi   r52, r52, -64
+	}
+	wh64    r52
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	.ifnc \function,handle_nmi
+	/*
+	 * We finally have enough state set up to notify the irq
+	 * tracing code that irqs were disabled on entry to the handler.
+	 * The TRACE_IRQS_OFF call clobbers registers r0-r29.
+	 * For syscalls, we already have the register state saved away
+	 * on the stack, so we don't bother to do any register saves here,
+	 * and later we pop the registers back off the kernel stack.
+	 * For interrupt handlers, save r0-r3 in callee-saved registers.
+	 */
+	.ifnc \function,handle_syscall
+	{ move r30, r0; move r31, r1 }
+	{ move r32, r2; move r33, r3 }
+	.endif
+	TRACE_IRQS_OFF
+	.ifnc \function,handle_syscall
+	{ move r0, r30; move r1, r31 }
+	{ move r2, r32; move r3, r33 }
+	.endif
+	.endif
+#endif
+
+	.endm
+
+	.macro  check_single_stepping, kind, not_single_stepping
+	/*
+	 * Check for single stepping in user-level priv
+	 *   kind can be "normal", "ill", or "syscall"
+	 * At end, if fall-thru
+	 *   r29: thread_info->step_state
+	 *   r28: &pt_regs->pc
+	 *   r27: pt_regs->pc
+	 *   r26: thread_info->step_state->buffer
+	 */
+
+	/* Check for single stepping */
+	GET_THREAD_INFO(r29)
+	{
+	 /* Get pointer to field holding step state */
+	 addi   r29, r29, THREAD_INFO_STEP_STATE_OFFSET
+
+	 /* Get pointer to EX1 in register state */
+	 PTREGS_PTR(r27, PTREGS_OFFSET_EX1)
+	}
+	{
+	 /* Get pointer to field holding PC */
+	 PTREGS_PTR(r28, PTREGS_OFFSET_PC)
+
+	 /* Load the pointer to the step state */
+	 lw     r29, r29
+	}
+	/* Load EX1 */
+	lw      r27, r27
+	{
+	 /* Points to flags */
+	 addi   r23, r29, SINGLESTEP_STATE_FLAGS_OFFSET
+
+	 /* No single stepping if there is no step state structure */
+	 bzt    r29, \not_single_stepping
+	}
+	{
+	 /* mask off ICS and any other high bits */
+	 andi   r27, r27, SPR_EX_CONTEXT_1_1__PL_MASK
+
+	 /* Load pointer to single step instruction buffer */
+	 lw     r26, r29
+	}
+	/* Check priv state */
+	bnz     r27, \not_single_stepping
+
+	/* Get flags */
+	lw      r22, r23
+	{
+	 /* Branch if single-step mode not enabled */
+	 bbnst  r22, \not_single_stepping
+
+	 /* Clear enabled flag */
+	 andi   r22, r22, ~SINGLESTEP_STATE_MASK_IS_ENABLED
+	}
+	.ifc \kind,normal
+	{
+	 /* Load PC */
+	 lw     r27, r28
+
+	 /* Point to the entry containing the original PC */
+	 addi   r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET
+	}
+	{
+	 /* Disable single stepping flag */
+	 sw     r23, r22
+	}
+	{
+	 /* Get the original pc */
+	 lw     r24, r24
+
+	 /* See if the PC is at the start of the single step buffer */
+	 seq    r25, r26, r27
+	}
+	/*
+	 * NOTE: it is really expected that the PC be in the single step buffer
+	 *       at this point
+	 */
+	bzt     r25, \not_single_stepping
+
+	/* Restore the original PC */
+	sw      r28, r24
+	.else
+	.ifc \kind,syscall
+	{
+	 /* Load PC */
+	 lw     r27, r28
+
+	 /* Point to the entry containing the next PC */
+	 addi   r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET
+	}
+	{
+	 /* Increment the stopped PC by the bundle size */
+	 addi   r26, r26, 8
+
+	 /* Disable single stepping flag */
+	 sw     r23, r22
+	}
+	{
+	 /* Get the next pc */
+	 lw     r24, r24
+
+	 /*
+	  * See if the PC is one bundle past the start of the
+	  * single step buffer
+	  */
+	 seq    r25, r26, r27
+	}
+	{
+	 /*
+	  * NOTE: it is really expected that the PC be in the
+	  * single step buffer at this point
+	  */
+	 bzt    r25, \not_single_stepping
+	}
+	/* Set to the next PC */
+	sw      r28, r24
+	.else
+	{
+	 /* Point to 3rd bundle in buffer */
+	 addi   r25, r26, 16
+
+	 /* Load PC */
+	 lw      r27, r28
+	}
+	{
+	 /* Disable single stepping flag */
+	 sw      r23, r22
+
+	 /* See if the PC is in the single step buffer */
+	 slte_u  r24, r26, r27
+	}
+	{
+	 slte_u r25, r27, r25
+
+	 /*
+	  * NOTE: it is really expected that the PC be in the
+	  * single step buffer at this point
+	  */
+	 bzt    r24, \not_single_stepping
+	}
+	bzt     r25, \not_single_stepping
+	.endif
+	.endif
+	.endm
+
+	/*
+	 * Redispatch a downcall.
+	 */
+	.macro  dc_dispatch vecnum, vecname
+	.org    (\vecnum << 8)
+intvec_\vecname:
+	j       hv_downcall_dispatch
+	ENDPROC(intvec_\vecname)
+	.endm
+
+	/*
+	 * Common code for most interrupts.  The C function we're eventually
+	 * going to is in r0, and the faultnum is in r1; the original
+	 * values for those registers are on the stack.
+	 */
+	.pushsection .text.handle_interrupt,"ax"
+handle_interrupt:
+	finish_interrupt_save handle_interrupt
+
+	/*
+	 * Check for if we are single stepping in user level. If so, then
+	 * we need to restore the PC.
+	 */
+
+	check_single_stepping normal, .Ldispatch_interrupt
+.Ldispatch_interrupt:
+
+	/* Jump to the C routine; it should enable irqs as soon as possible. */
+	{
+	 jalr   r0
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	}
+	FEEDBACK_REENTER(handle_interrupt)
+	{
+	 movei  r30, 0   /* not an NMI */
+	 j      interrupt_return
+	}
+	STD_ENDPROC(handle_interrupt)
+
+/*
+ * This routine takes a boolean in r30 indicating if this is an NMI.
+ * If so, we also expect a boolean in r31 indicating whether to
+ * re-enable the oprofile interrupts.
+ */
+STD_ENTRY(interrupt_return)
+	/* If we're resuming to kernel space, don't check thread flags. */
+	{
+	 bnz    r30, .Lrestore_all  /* NMIs don't special-case user-space */
+	 PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
+	}
+	lw      r29, r29
+	andi    r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK  /* mask off ICS */
+	{
+	 bzt    r29, .Lresume_userspace
+	 PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+	}
+
+	/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
+	{
+	 lw     r28, r29
+	 moveli r27, lo16(_cpu_idle_nap)
+	}
+	{
+	 auli   r27, r27, ha16(_cpu_idle_nap)
+	}
+	{
+	 seq    r27, r27, r28
+	}
+	{
+	 bbns   r27, .Lrestore_all
+	 addi   r28, r28, 8
+	}
+	sw      r29, r28
+	j       .Lrestore_all
+
+.Lresume_userspace:
+	FEEDBACK_REENTER(interrupt_return)
+
+	/*
+	 * Disable interrupts so as to make sure we don't
+	 * miss an interrupt that sets any of the thread flags (like
+	 * need_resched or sigpending) between sampling and the iret.
+	 * Routines like schedule() or do_signal() may re-enable
+	 * interrupts before returning.
+	 */
+	IRQ_DISABLE(r20, r21)
+	TRACE_IRQS_OFF  /* Note: clobbers registers r0-r29 */
+
+	/* Get base of stack in r32; note r30/31 are used as arguments here. */
+	GET_THREAD_INFO(r32)
+
+
+	/* Check to see if there is any work to do before returning to user. */
+	{
+	 addi   r29, r32, THREAD_INFO_FLAGS_OFFSET
+	 moveli r28, lo16(_TIF_ALLWORK_MASK)
+	}
+	{
+	 lw     r29, r29
+	 auli   r28, r28, ha16(_TIF_ALLWORK_MASK)
+	}
+	and     r28, r29, r28
+	bnz     r28, .Lwork_pending
+
+	/*
+	 * In the NMI case we
+	 * omit the call to single_process_check_nohz, which normally checks
+	 * to see if we should start or stop the scheduler tick, because
+	 * we can't call arbitrary Linux code from an NMI context.
+	 * We always call the homecache TLB deferral code to re-trigger
+	 * the deferral mechanism.
+	 *
+	 * The other chunk of responsibility this code has is to reset the
+	 * interrupt masks appropriately to reset irqs and NMIs.  We have
+	 * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the
+	 * lockdep-type stuff, but we can't set ICS until afterwards, since
+	 * ICS can only be used in very tight chunks of code to avoid
+	 * tripping over various assertions that it is off.
+	 *
+	 * (There is what looks like a window of vulnerability here since
+	 * we might take a profile interrupt between the two SPR writes
+	 * that set the mask, but since we write the low SPR word first,
+	 * and our interrupt entry code checks the low SPR word, any
+	 * profile interrupt will actually disable interrupts in both SPRs
+	 * before returning, which is OK.)
+	 */
+.Lrestore_all:
+	PTREGS_PTR(r0, PTREGS_OFFSET_EX1)
+	{
+	 lw     r0, r0
+	 PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
+	}
+	{
+	 andi   r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+	 lw     r32, r32
+	}
+	bnz    r0, 1f
+	j       2f
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use bbnst below
+#endif
+1:	bbnst   r32, 2f
+	IRQ_DISABLE(r20,r21)
+	TRACE_IRQS_OFF
+	movei   r0, 1
+	mtspr   INTERRUPT_CRITICAL_SECTION, r0
+	bzt     r30, .Lrestore_regs
+	j       3f
+2:	TRACE_IRQS_ON
+	movei   r0, 1
+	mtspr   INTERRUPT_CRITICAL_SECTION, r0
+	IRQ_ENABLE(r20, r21)
+	bzt     r30, .Lrestore_regs
+3:
+
+
+	/*
+	 * We now commit to returning from this interrupt, since we will be
+	 * doing things like setting EX_CONTEXT SPRs and unwinding the stack
+	 * frame.  No calls should be made to any other code after this point.
+	 * This code should only be entered with ICS set.
+	 * r32 must still be set to ptregs.flags.
+	 * We launch loads to each cache line separately first, so we can
+	 * get some parallelism out of the memory subsystem.
+	 * We start zeroing caller-saved registers throughout, since
+	 * that will save some cycles if this turns out to be a syscall.
+	 */
+.Lrestore_regs:
+	FEEDBACK_REENTER(interrupt_return)   /* called from elsewhere */
+
+	/*
+	 * Rotate so we have one high bit and one low bit to test.
+	 * - low bit says whether to restore all the callee-saved registers,
+	 *   or just r30-r33, and r52 up.
+	 * - high bit (i.e. sign bit) says whether to restore all the
+	 *   caller-saved registers, or just r0.
+	 */
+#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4
+# error Rotate trick does not work :-)
+#endif
+	{
+	 rli    r20, r32, 30
+	 PTREGS_PTR(sp, PTREGS_OFFSET_REG(0))
+	}
+
+	/*
+	 * Load cache lines 0, 2, and 3 in that order, then use
+	 * the last loaded value, which makes it likely that the other
+	 * cache lines have also loaded, at which point we should be
+	 * able to safely read all the remaining words on those cache
+	 * lines without waiting for the memory subsystem.
+	 */
+	pop_reg_zero r0, r1, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0)
+	pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30)
+	pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+	pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1
+	{
+	 mtspr  EX_CONTEXT_1_0, r21
+	 move   r5, zero
+	}
+	{
+	 mtspr  EX_CONTEXT_1_1, lr
+	 andi   lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK  /* mask off ICS */
+	}
+
+	/* Restore callee-saveds that we actually use. */
+	pop_reg_zero r52, r6, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_REG(52)
+	pop_reg_zero r31, r7
+	pop_reg_zero r32, r8
+	pop_reg_zero r33, r9, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33)
+
+	/*
+	 * If we modified other callee-saveds, restore them now.
+	 * This is rare, but could be via ptrace or signal handler.
+	 */
+	{
+	 move   r10, zero
+	 bbs    r20, .Lrestore_callees
+	}
+.Lcontinue_restore_regs:
+
+	/* Check if we're returning from a syscall. */
+	{
+	 move   r11, zero
+	 blzt   r20, 1f  /* no, so go restore callee-save registers */
+	}
+
+	/*
+	 * Check if we're returning to userspace.
+	 * Note that if we're not, we don't worry about zeroing everything.
+	 */
+	{
+	 addli  sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29)
+	 bnz    lr, .Lkernel_return
+	}
+
+	/*
+	 * On return from syscall, we've restored r0 from pt_regs, but we
+	 * clear the remainder of the caller-saved registers.  We could
+	 * restore the syscall arguments, but there's not much point,
+	 * and it ensures user programs aren't trying to use the
+	 * caller-saves if we clear them, as well as avoiding leaking
+	 * kernel pointers into userspace.
+	 */
+	pop_reg_zero lr, r12, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+	pop_reg_zero tp, r13, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+	{
+	 lw     sp, sp
+	 move   r14, zero
+	 move   r15, zero
+	}
+	{ move r16, zero; move r17, zero }
+	{ move r18, zero; move r19, zero }
+	{ move r20, zero; move r21, zero }
+	{ move r22, zero; move r23, zero }
+	{ move r24, zero; move r25, zero }
+	{ move r26, zero; move r27, zero }
+	{ move r28, zero; move r29, zero }
+	iret
+
+	/*
+	 * Not a syscall, so restore caller-saved registers.
+	 * First kick off a load for cache line 1, which we're touching
+	 * for the first time here.
+	 */
+	.align 64
+1:	pop_reg r29, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(29)
+	pop_reg r1
+	pop_reg r2
+	pop_reg r3
+	pop_reg r4
+	pop_reg r5
+	pop_reg r6
+	pop_reg r7
+	pop_reg r8
+	pop_reg r9
+	pop_reg r10
+	pop_reg r11
+	pop_reg r12
+	pop_reg r13
+	pop_reg r14
+	pop_reg r15
+	pop_reg r16
+	pop_reg r17
+	pop_reg r18
+	pop_reg r19
+	pop_reg r20
+	pop_reg r21
+	pop_reg r22
+	pop_reg r23
+	pop_reg r24
+	pop_reg r25
+	pop_reg r26
+	pop_reg r27
+	pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28)
+	/* r29 already restored above */
+	bnz     lr, .Lkernel_return
+	pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+	pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+	lw      sp, sp
+	iret
+
+	/*
+	 * We can't restore tp when in kernel mode, since a thread might
+	 * have migrated from another cpu and brought a stale tp value.
+	 */
+.Lkernel_return:
+	pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+	lw      sp, sp
+	iret
+
+	/* Restore callee-saved registers from r34 to r51. */
+.Lrestore_callees:
+	addli  sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29)
+	pop_reg r34
+	pop_reg r35
+	pop_reg r36
+	pop_reg r37
+	pop_reg r38
+	pop_reg r39
+	pop_reg r40
+	pop_reg r41
+	pop_reg r42
+	pop_reg r43
+	pop_reg r44
+	pop_reg r45
+	pop_reg r46
+	pop_reg r47
+	pop_reg r48
+	pop_reg r49
+	pop_reg r50
+	pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
+	j .Lcontinue_restore_regs
+
+.Lwork_pending:
+	/* Mask the reschedule flag */
+	andi    r28, r29, _TIF_NEED_RESCHED
+
+	{
+	 /*
+	  * If the NEED_RESCHED flag is called, we call schedule(), which
+	  * may drop this context right here and go do something else.
+	  * On return, jump back to .Lresume_userspace and recheck.
+	  */
+	 bz     r28, .Lasync_tlb
+
+	 /* Mask the async-tlb flag */
+	 andi   r28, r29, _TIF_ASYNC_TLB
+	}
+
+	jal     schedule
+	FEEDBACK_REENTER(interrupt_return)
+
+	/* Reload the flags and check again */
+	j       .Lresume_userspace
+
+.Lasync_tlb:
+	{
+	 bz     r28, .Lneed_sigpending
+
+	 /* Mask the sigpending flag */
+	 andi   r28, r29, _TIF_SIGPENDING
+	}
+
+	PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	jal     do_async_page_fault
+	FEEDBACK_REENTER(interrupt_return)
+
+	/*
+	 * Go restart the "resume userspace" process.  We may have
+	 * fired a signal, and we need to disable interrupts again.
+	 */
+	j       .Lresume_userspace
+
+.Lneed_sigpending:
+	/*
+	 * At this point we are either doing signal handling or single-step,
+	 * so either way make sure we have all the registers saved.
+	 */
+	push_extra_callee_saves r0
+
+	{
+	 /* If no signal pending, skip to singlestep check */
+	 bz     r28, .Lneed_singlestep
+
+	 /* Mask the singlestep flag */
+	 andi   r28, r29, _TIF_SINGLESTEP
+	}
+
+	jal     do_signal
+	FEEDBACK_REENTER(interrupt_return)
+
+	/* Reload the flags and check again */
+	j       .Lresume_userspace
+
+.Lneed_singlestep:
+	{
+	 /* Get a pointer to the EX1 field */
+	 PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
+
+	 /* If we get here, our bit must be set. */
+	 bz     r28, .Lwork_confusion
+	}
+	/* If we are in priv mode, don't single step */
+	lw      r28, r29
+	andi    r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK  /* mask off ICS */
+	bnz     r28, .Lrestore_all
+
+	/* Allow interrupts within the single step code */
+	TRACE_IRQS_ON  /* Note: clobbers registers r0-r29 */
+	IRQ_ENABLE(r20, r21)
+
+	/* try to single-step the current instruction */
+	PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	jal     single_step_once
+	FEEDBACK_REENTER(interrupt_return)
+
+	/* Re-disable interrupts.  TRACE_IRQS_OFF in .Lrestore_all. */
+	IRQ_DISABLE(r20,r21)
+
+	j       .Lrestore_all
+
+.Lwork_confusion:
+	move    r0, r28
+	panic   "thread_info allwork flags unhandled on userspace resume: %#x"
+
+	STD_ENDPROC(interrupt_return)
+
+	/*
+	 * This interrupt variant clears the INT_INTCTRL_1 interrupt mask bit
+	 * before returning, so we can properly get more downcalls.
+	 */
+	.pushsection .text.handle_interrupt_downcall,"ax"
+handle_interrupt_downcall:
+	finish_interrupt_save handle_interrupt_downcall
+	check_single_stepping normal, .Ldispatch_downcall
+.Ldispatch_downcall:
+
+	/* Clear INTCTRL_1 from the set of interrupts we ever enable. */
+	GET_INTERRUPTS_ENABLED_MASK_PTR(r30)
+	{
+	 addi   r30, r30, 4
+	 movei  r31, INT_MASK(INT_INTCTRL_1)
+	}
+	{
+	 lw     r20, r30
+	 nor    r21, r31, zero
+	}
+	and     r20, r20, r21
+	sw      r30, r20
+
+	{
+	 jalr   r0
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	}
+	FEEDBACK_REENTER(handle_interrupt_downcall)
+
+	/* Allow INTCTRL_1 to be enabled next time we enable interrupts. */
+	lw      r20, r30
+	or      r20, r20, r31
+	sw      r30, r20
+
+	{
+	 movei  r30, 0   /* not an NMI */
+	 j      interrupt_return
+	}
+	STD_ENDPROC(handle_interrupt_downcall)
+
+	/*
+	 * Some interrupts don't check for single stepping
+	 */
+	.pushsection .text.handle_interrupt_no_single_step,"ax"
+handle_interrupt_no_single_step:
+	finish_interrupt_save handle_interrupt_no_single_step
+	{
+	 jalr   r0
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	}
+	FEEDBACK_REENTER(handle_interrupt_no_single_step)
+	{
+	 movei  r30, 0   /* not an NMI */
+	 j      interrupt_return
+	}
+	STD_ENDPROC(handle_interrupt_no_single_step)
+
+	/*
+	 * "NMI" interrupts mask ALL interrupts before calling the
+	 * handler, and don't check thread flags, etc., on the way
+	 * back out.  In general, the only things we do here for NMIs
+	 * are the register save/restore, fixing the PC if we were
+	 * doing single step, and the dataplane kernel-TLB management.
+	 * We don't (for example) deal with start/stop of the sched tick.
+	 */
+	.pushsection .text.handle_nmi,"ax"
+handle_nmi:
+	finish_interrupt_save handle_nmi
+	check_single_stepping normal, .Ldispatch_nmi
+.Ldispatch_nmi:
+	{
+	 jalr   r0
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	}
+	FEEDBACK_REENTER(handle_nmi)
+	j       interrupt_return
+	STD_ENDPROC(handle_nmi)
+
+	/*
+	 * Parallel code for syscalls to handle_interrupt.
+	 */
+	.pushsection .text.handle_syscall,"ax"
+handle_syscall:
+	finish_interrupt_save handle_syscall
+
+	/*
+	 * Check for if we are single stepping in user level. If so, then
+	 * we need to restore the PC.
+	 */
+	check_single_stepping syscall, .Ldispatch_syscall
+.Ldispatch_syscall:
+
+	/* Enable irqs. */
+	TRACE_IRQS_ON
+	IRQ_ENABLE(r20, r21)
+
+	/* Bump the counter for syscalls made on this tile. */
+	moveli  r20, lo16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+	auli    r20, r20, ha16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+	add     r20, r20, tp
+	lw      r21, r20
+	addi    r21, r21, 1
+	sw      r20, r21
+
+	/* Trace syscalls, if requested. */
+	GET_THREAD_INFO(r31)
+	addi	r31, r31, THREAD_INFO_FLAGS_OFFSET
+	lw	r30, r31
+	andi    r30, r30, _TIF_SYSCALL_TRACE
+	bzt	r30, .Lrestore_syscall_regs
+	jal	do_syscall_trace
+	FEEDBACK_REENTER(handle_syscall)
+
+	/*
+	 * We always reload our registers from the stack at this
+	 * point.  They might be valid, if we didn't build with
+	 * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not
+	 * doing syscall tracing, but there are enough cases now that it
+	 * seems simplest just to do the reload unconditionally.
+	 */
+.Lrestore_syscall_regs:
+	PTREGS_PTR(r11, PTREGS_OFFSET_REG(0))
+	pop_reg r0, r11
+	pop_reg r1, r11
+	pop_reg r2, r11
+	pop_reg r3, r11
+	pop_reg r4, r11
+	pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5)
+	pop_reg TREG_SYSCALL_NR_NAME, r11
+
+	/* Ensure that the syscall number is within the legal range. */
+	moveli  r21, __NR_syscalls
+	{
+	 slt_u  r21, TREG_SYSCALL_NR_NAME, r21
+	 moveli r20, lo16(sys_call_table)
+	}
+	{
+	 bbns   r21, .Linvalid_syscall
+	 auli   r20, r20, ha16(sys_call_table)
+	}
+	s2a     r20, TREG_SYSCALL_NR_NAME, r20
+	lw      r20, r20
+
+	/* Jump to syscall handler. */
+	jalr    r20; .Lhandle_syscall_link:
+	FEEDBACK_REENTER(handle_syscall)
+
+	/*
+	 * Write our r0 onto the stack so it gets restored instead
+	 * of whatever the user had there before.
+	 */
+	PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+	sw      r29, r0
+
+	/* Do syscall trace again, if requested. */
+	lw	r30, r31
+	andi    r30, r30, _TIF_SYSCALL_TRACE
+	bzt     r30, 1f
+	jal	do_syscall_trace
+	FEEDBACK_REENTER(handle_syscall)
+1:	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+
+.Linvalid_syscall:
+	/* Report an invalid syscall back to the user program */
+	{
+	 PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+	 movei  r28, -ENOSYS
+	}
+	sw      r29, r28
+	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	STD_ENDPROC(handle_syscall)
+
+	/* Return the address for oprofile to suppress in backtraces. */
+STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall)
+	lnk     r0
+	{
+	 addli  r0, r0, .Lhandle_syscall_link - .
+	 jrp    lr
+	}
+	STD_ENDPROC(handle_syscall_link_address)
+
+STD_ENTRY(ret_from_fork)
+	jal     sim_notify_fork
+	jal     schedule_tail
+	FEEDBACK_REENTER(ret_from_fork)
+	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	STD_ENDPROC(ret_from_fork)
+
+	/*
+	 * Code for ill interrupt.
+	 */
+	.pushsection .text.handle_ill,"ax"
+handle_ill:
+	finish_interrupt_save handle_ill
+
+	/*
+	 * Check for if we are single stepping in user level. If so, then
+	 * we need to restore the PC.
+	 */
+	check_single_stepping ill, .Ldispatch_normal_ill
+
+	{
+	 /* See if the PC is the 1st bundle in the buffer */
+	 seq    r25, r27, r26
+
+	 /* Point to the 2nd bundle in the buffer */
+	 addi   r26, r26, 8
+	}
+	{
+	 /* Point to the original pc */
+	 addi   r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET
+
+	 /* Branch if the PC is the 1st bundle in the buffer */
+	 bnz    r25, 3f
+	}
+	{
+	 /* See if the PC is the 2nd bundle of the buffer */
+	 seq    r25, r27, r26
+
+	 /* Set PC to next instruction */
+	 addi   r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET
+	}
+	{
+	 /* Point to flags */
+	 addi   r25, r29, SINGLESTEP_STATE_FLAGS_OFFSET
+
+	 /* Branch if PC is in the second bundle */
+	 bz     r25, 2f
+	}
+	/* Load flags */
+	lw      r25, r25
+	{
+	 /*
+	  * Get the offset for the register to restore
+	  * Note: the lower bound is 2, so we have implicit scaling by 4.
+	  *  No multiplication of the register number by the size of a register
+	  *  is needed.
+	  */
+	 mm     r27, r25, zero, SINGLESTEP_STATE_TARGET_LB, \
+		SINGLESTEP_STATE_TARGET_UB
+
+	 /* Mask Rewrite_LR */
+	 andi   r25, r25, SINGLESTEP_STATE_MASK_UPDATE
+	}
+	{
+	 addi   r29, r29, SINGLESTEP_STATE_UPDATE_VALUE_OFFSET
+
+	 /* Don't rewrite temp register */
+	 bz     r25, 3f
+	}
+	{
+	 /* Get the temp value */
+	 lw     r29, r29
+
+	 /* Point to where the register is stored */
+	 add    r27, r27, sp
+	}
+
+	/* Add in the C ABI save area size to the register offset */
+	addi    r27, r27, C_ABI_SAVE_AREA_SIZE
+
+	/* Restore the user's register with the temp value */
+	sw      r27, r29
+	j       3f
+
+2:
+	/* Must be in the third bundle */
+	addi    r24, r29, SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET
+
+3:
+	/* set PC and continue */
+	lw      r26, r24
+	sw      r28, r26
+
+	/* Clear TIF_SINGLESTEP */
+	GET_THREAD_INFO(r0)
+
+	addi    r1, r0, THREAD_INFO_FLAGS_OFFSET
+	{
+	 lw     r2, r1
+	 addi   r0, r0, THREAD_INFO_TASK_OFFSET  /* currently a no-op */
+	}
+	andi    r2, r2, ~_TIF_SINGLESTEP
+	sw      r1, r2
+
+	/* Issue a sigtrap */
+	{
+	 lw     r0, r0          /* indirect thru thread_info to get task_info*/
+	 addi   r1, sp, C_ABI_SAVE_AREA_SIZE  /* put ptregs pointer into r1 */
+	 move   r2, zero        /* load error code into r2 */
+	}
+
+	jal     send_sigtrap    /* issue a SIGTRAP */
+	FEEDBACK_REENTER(handle_ill)
+	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+
+.Ldispatch_normal_ill:
+	{
+	 jalr   r0
+	 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+	}
+	FEEDBACK_REENTER(handle_ill)
+	{
+	 movei  r30, 0   /* not an NMI */
+	 j      interrupt_return
+	}
+	STD_ENDPROC(handle_ill)
+
+	.pushsection .rodata, "a"
+	.align  8
+bpt_code:
+	bpt
+	ENDPROC(bpt_code)
+	.popsection
+
+/* Various stub interrupt handlers and syscall handlers */
+
+STD_ENTRY_LOCAL(_kernel_double_fault)
+	mfspr   r1, EX_CONTEXT_1_0
+	move    r2, lr
+	move    r3, sp
+	move    r4, r52
+	addi    sp, sp, -C_ABI_SAVE_AREA_SIZE
+	j       kernel_double_fault
+	STD_ENDPROC(_kernel_double_fault)
+
+STD_ENTRY_LOCAL(bad_intr)
+	mfspr   r2, EX_CONTEXT_1_0
+	panic   "Unhandled interrupt %#x: PC %#lx"
+	STD_ENDPROC(bad_intr)
+
+/* Put address of pt_regs in reg and jump. */
+#define PTREGS_SYSCALL(x, reg)                          \
+	STD_ENTRY(x);                                   \
+	{                                               \
+	 PTREGS_PTR(reg, PTREGS_OFFSET_BASE);           \
+	 j      _##x                                    \
+	};                                              \
+	STD_ENDPROC(x)
+
+PTREGS_SYSCALL(sys_execve, r3)
+PTREGS_SYSCALL(sys_sigaltstack, r2)
+PTREGS_SYSCALL(sys_rt_sigreturn, r0)
+
+/* Save additional callee-saves to pt_regs, put address in reg and jump. */
+#define PTREGS_SYSCALL_ALL_REGS(x, reg)                 \
+	STD_ENTRY(x);                                   \
+	push_extra_callee_saves reg;                    \
+	j       _##x;                                   \
+	STD_ENDPROC(x)
+
+PTREGS_SYSCALL_ALL_REGS(sys_fork, r0)
+PTREGS_SYSCALL_ALL_REGS(sys_vfork, r0)
+PTREGS_SYSCALL_ALL_REGS(sys_clone, r4)
+PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1)
+
+/*
+ * This entrypoint is taken for the cmpxchg and atomic_update fast
+ * swints.  We may wish to generalize it to other fast swints at some
+ * point, but for now there are just two very similar ones, which
+ * makes it faster.
+ *
+ * The fast swint code is designed to have a small footprint.  It does
+ * not save or restore any GPRs, counting on the caller-save registers
+ * to be available to it on entry.  It does not modify any callee-save
+ * registers (including "lr").  It does not check what PL it is being
+ * called at, so you'd better not call it other than at PL0.
+ *
+ * It does not use the stack, but since it might be re-interrupted by
+ * a page fault which would assume the stack was valid, it does
+ * save/restore the stack pointer and zero it out to make sure it gets reset.
+ * Since we always keep interrupts disabled, the hypervisor won't
+ * clobber our EX_CONTEXT_1_x registers, so we don't save/restore them
+ * (other than to advance the PC on return).
+ *
+ * We have to manually validate the user vs kernel address range
+ * (since at PL1 we can read/write both), and for performance reasons
+ * we don't allow cmpxchg on the fc000000 memory region, since we only
+ * validate that the user address is below PAGE_OFFSET.
+ *
+ * We place it in the __HEAD section to ensure it is relatively
+ * near to the intvec_SWINT_1 code (reachable by a conditional branch).
+ *
+ * Must match register usage in do_page_fault().
+ */
+	__HEAD
+	.align 64
+	/* Align much later jump on the start of a cache line. */
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	nop; nop
+#endif
+ENTRY(sys_cmpxchg)
+
+	/*
+	 * Save "sp" and set it zero for any possible page fault.
+	 *
+	 * HACK: We want to both zero sp and check r0's alignment,
+	 * so we do both at once. If "sp" becomes nonzero we
+	 * know r0 is unaligned and branch to the error handler that
+	 * restores sp, so this is OK.
+	 *
+	 * ICS is disabled right now so having a garbage but nonzero
+	 * sp is OK, since we won't execute any faulting instructions
+	 * when it is nonzero.
+	 */
+	{
+	 move   r27, sp
+	 andi	sp, r0, 3
+	}
+
+	/*
+	 * Get the lock address in ATOMIC_LOCK_REG, and also validate that the
+	 * address is less than PAGE_OFFSET, since that won't trap at PL1.
+	 * We only use bits less than PAGE_SHIFT to avoid having to worry
+	 * about aliasing among multiple mappings of the same physical page,
+	 * and we ignore the low 3 bits so we have one lock that covers
+	 * both a cmpxchg64() and a cmpxchg() on either its low or high word.
+	 * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c.
+	 */
+
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	{
+	 /* Check for unaligned input. */
+	 bnz    sp, .Lcmpxchg_badaddr
+	 mm     r25, r0, zero, 3, PAGE_SHIFT-1
+	}
+	{
+	 crc32_32 r25, zero, r25
+	 moveli r21, lo16(atomic_lock_ptr)
+	}
+	{
+	 auli   r21, r21, ha16(atomic_lock_ptr)
+	 auli   r23, zero, hi16(PAGE_OFFSET)  /* hugepage-aligned */
+	}
+	{
+	 shri	r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
+	 slt_u  r23, r0, r23
+
+	 /*
+	  * Ensure that the TLB is loaded before we take out the lock.
+	  * On TILEPro, this will start fetching the value all the way
+	  * into our L1 as well (and if it gets modified before we
+	  * grab the lock, it will be invalidated from our cache
+	  * before we reload it).  On tile64, we'll start fetching it
+	  * into our L1 if we're the home, and if we're not, we'll
+	  * still at least start fetching it into the home's L2.
+	  */
+	 lw	r26, r0
+	}
+	{
+	 s2a    r21, r20, r21
+	 bbns   r23, .Lcmpxchg_badaddr
+	}
+	{
+	 lw     r21, r21
+	 seqi	r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
+	 andi	r25, r25, ATOMIC_HASH_L2_SIZE - 1
+	}
+	{
+	 /* Branch away at this point if we're doing a 64-bit cmpxchg. */
+	 bbs    r23, .Lcmpxchg64
+	 andi   r23, r0, 7       /* Precompute alignment for cmpxchg64. */
+	}
+
+	{
+	 /*
+	  * We very carefully align the code that actually runs with
+	  * the lock held (nine bundles) so that we know it is all in
+	  * the icache when we start.  This instruction (the jump) is
+	  * at the start of the first cache line, address zero mod 64;
+	  * we jump to somewhere in the second cache line to issue the
+	  * tns, then jump back to finish up.
+	  */
+	 s2a	ATOMIC_LOCK_REG_NAME, r25, r21
+	 j      .Lcmpxchg32_tns
+	}
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+	{
+	 /* Check for unaligned input. */
+	 bnz    sp, .Lcmpxchg_badaddr
+	 auli   r23, zero, hi16(PAGE_OFFSET)  /* hugepage-aligned */
+	}
+	{
+	 /*
+	  * Slide bits into position for 'mm'. We want to ignore
+	  * the low 3 bits of r0, and consider only the next
+	  * ATOMIC_HASH_SHIFT bits.
+	  * Because of C pointer arithmetic, we want to compute this:
+	  *
+	  * ((char*)atomic_locks +
+	  *  (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2))
+	  *
+	  * Instead of two shifts we just ">> 1", and use 'mm'
+	  * to ignore the low and high bits we don't want.
+	  */
+	 shri	r25, r0, 1
+
+	 slt_u  r23, r0, r23
+
+	 /*
+	  * Ensure that the TLB is loaded before we take out the lock.
+	  * On tilepro, this will start fetching the value all the way
+	  * into our L1 as well (and if it gets modified before we
+	  * grab the lock, it will be invalidated from our cache
+	  * before we reload it).  On tile64, we'll start fetching it
+	  * into our L1 if we're the home, and if we're not, we'll
+	  * still at least start fetching it into the home's L2.
+	  */
+	 lw	r26, r0
+	}
+	{
+	 /* atomic_locks is page aligned so this suffices to get its addr. */
+	 auli	r21, zero, hi16(atomic_locks)
+
+	 bbns   r23, .Lcmpxchg_badaddr
+	}
+	{
+	 /*
+	  * Insert the hash bits into the page-aligned pointer.
+	  * ATOMIC_HASH_SHIFT is so big that we don't actually hash
+	  * the unmasked address bits, as that may cause unnecessary
+	  * collisions.
+	  */
+	 mm	ATOMIC_LOCK_REG_NAME, r25, r21, 2, (ATOMIC_HASH_SHIFT + 2) - 1
+
+	 seqi	r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
+	}
+	{
+	 /* Branch away at this point if we're doing a 64-bit cmpxchg. */
+	 bbs    r23, .Lcmpxchg64
+	 andi   r23, r0, 7       /* Precompute alignment for cmpxchg64. */
+	}
+	{
+	 /*
+	  * We very carefully align the code that actually runs with
+	  * the lock held (nine bundles) so that we know it is all in
+	  * the icache when we start.  This instruction (the jump) is
+	  * at the start of the first cache line, address zero mod 64;
+	  * we jump to somewhere in the second cache line to issue the
+	  * tns, then jump back to finish up.
+	  */
+	 j      .Lcmpxchg32_tns
+	}
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+	ENTRY(__sys_cmpxchg_grab_lock)
+
+	/*
+	 * Perform the actual cmpxchg or atomic_update.
+	 * Note that __futex_mark_unlocked() in uClibc relies on
+	 * atomic_update() to always perform an "mf", so don't make
+	 * it optional or conditional without modifying that code.
+	 */
+.Ldo_cmpxchg32:
+	{
+	 lw     r21, r0
+	 seqi	r23, TREG_SYSCALL_NR_NAME, __NR_FAST_atomic_update
+	 move	r24, r2
+	}
+	{
+	 seq    r22, r21, r1     /* See if cmpxchg matches. */
+	 and	r25, r21, r1     /* If atomic_update, compute (*mem & mask) */
+	}
+	{
+	 or	r22, r22, r23    /* Skip compare branch for atomic_update. */
+	 add	r25, r25, r2     /* Compute (*mem & mask) + addend. */
+	}
+	{
+	 mvnz	r24, r23, r25    /* Use atomic_update value if appropriate. */
+	 bbns   r22, .Lcmpxchg32_mismatch
+	}
+	sw      r0, r24
+
+	/* Do slow mtspr here so the following "mf" waits less. */
+	{
+	 move   sp, r27
+	 mtspr  EX_CONTEXT_1_0, r28
+	}
+	mf
+
+	/* The following instruction is the start of the second cache line. */
+	{
+	 move   r0, r21
+	 sw     ATOMIC_LOCK_REG_NAME, zero
+	}
+	iret
+
+	/* Duplicated code here in the case where we don't overlap "mf" */
+.Lcmpxchg32_mismatch:
+	{
+	 move   r0, r21
+	 sw     ATOMIC_LOCK_REG_NAME, zero
+	}
+	{
+	 move   sp, r27
+	 mtspr  EX_CONTEXT_1_0, r28
+	}
+	iret
+
+	/*
+	 * The locking code is the same for 32-bit cmpxchg/atomic_update,
+	 * and for 64-bit cmpxchg.  We provide it as a macro and put
+	 * it into both versions.  We can't share the code literally
+	 * since it depends on having the right branch-back address.
+	 * Note that the first few instructions should share the cache
+	 * line with the second half of the actual locked code.
+	 */
+	.macro  cmpxchg_lock, bitwidth
+
+	/* Lock; if we succeed, jump back up to the read-modify-write. */
+#ifdef CONFIG_SMP
+	tns     r21, ATOMIC_LOCK_REG_NAME
+#else
+	/*
+	 * Non-SMP preserves all the lock infrastructure, to keep the
+	 * code simpler for the interesting (SMP) case.  However, we do
+	 * one small optimization here and in atomic_asm.S, which is
+	 * to fake out acquiring the actual lock in the atomic_lock table.
+	 */
+	movei	r21, 0
+#endif
+
+	/* Issue the slow SPR here while the tns result is in flight. */
+	mfspr   r28, EX_CONTEXT_1_0
+
+	{
+	 addi   r28, r28, 8    /* return to the instruction after the swint1 */
+	 bzt    r21, .Ldo_cmpxchg\bitwidth
+	}
+	/*
+	 * The preceding instruction is the last thing that must be
+	 * on the second cache line.
+	 */
+
+#ifdef CONFIG_SMP
+	/*
+	 * We failed to acquire the tns lock on our first try.  Now use
+	 * bounded exponential backoff to retry, like __atomic_spinlock().
+	 */
+	{
+	 moveli r23, 2048       /* maximum backoff time in cycles */
+	 moveli r25, 32         /* starting backoff time in cycles */
+	}
+1:	mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
+2:	mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
+	sub     r22, r22, r26
+	slt     r22, r22, r25
+	bbst    r22, 2b
+	{
+	 shli   r25, r25, 1     /* double the backoff; retry the tns */
+	 tns    r21, ATOMIC_LOCK_REG_NAME
+	}
+	slt     r26, r23, r25   /* is the proposed backoff too big? */
+	{
+	 mvnz   r25, r26, r23
+	 bzt    r21, .Ldo_cmpxchg\bitwidth
+	}
+	j       1b
+#endif /* CONFIG_SMP */
+	.endm
+
+.Lcmpxchg32_tns:
+	cmpxchg_lock 32
+
+	/*
+	 * This code is invoked from sys_cmpxchg after most of the
+	 * preconditions have been checked.  We still need to check
+	 * that r0 is 8-byte aligned, since if it's not we won't
+	 * actually be atomic.  However, ATOMIC_LOCK_REG has the atomic
+	 * lock pointer and r27/r28 have the saved SP/PC.
+	 * r23 is holding "r0 & 7" so we can test for alignment.
+	 * The compare value is in r2/r3; the new value is in r4/r5.
+	 * On return, we must put the old value in r0/r1.
+	 */
+	.align 64
+.Lcmpxchg64:
+	{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	 s2a	ATOMIC_LOCK_REG_NAME, r25, r21
+#endif
+	 bzt     r23, .Lcmpxchg64_tns
+	}
+	j       .Lcmpxchg_badaddr
+
+.Ldo_cmpxchg64:
+	{
+	 lw     r21, r0
+	 addi   r25, r0, 4
+	}
+	{
+	 lw     r1, r25
+	}
+	seq     r26, r21, r2
+	{
+	 bz     r26, .Lcmpxchg64_mismatch
+	 seq    r26, r1, r3
+	}
+	{
+	 bz     r26, .Lcmpxchg64_mismatch
+	}
+	sw      r0, r4
+	sw      r25, r5
+
+	/*
+	 * The 32-bit path provides optimized "match" and "mismatch"
+	 * iret paths, but we don't have enough bundles in this cache line
+	 * to do that, so we just make even the "mismatch" path do an "mf".
+	 */
+.Lcmpxchg64_mismatch:
+	{
+	 move   sp, r27
+	 mtspr  EX_CONTEXT_1_0, r28
+	}
+	mf
+	{
+	 move   r0, r21
+	 sw     ATOMIC_LOCK_REG_NAME, zero
+	}
+	iret
+
+.Lcmpxchg64_tns:
+	cmpxchg_lock 64
+
+
+	/*
+	 * Reset sp and revector to sys_cmpxchg_badaddr(), which will
+	 * just raise the appropriate signal and exit.  Doing it this
+	 * way means we don't have to duplicate the code in intvec.S's
+	 * int_hand macro that locates the top of the stack.
+	 */
+.Lcmpxchg_badaddr:
+	{
+	 moveli TREG_SYSCALL_NR_NAME, __NR_cmpxchg_badaddr
+	 move   sp, r27
+	}
+	j       intvec_SWINT_1
+	ENDPROC(sys_cmpxchg)
+	ENTRY(__sys_cmpxchg_end)
+
+
+/* The single-step support may need to read all the registers. */
+int_unalign:
+	push_extra_callee_saves r0
+	j       do_trap
+
+/* Include .intrpt1 array of interrupt vectors */
+	.section ".intrpt1", "ax"
+
+#define op_handle_perf_interrupt bad_intr
+#define op_handle_aux_perf_interrupt bad_intr
+
+#define do_hardwall_trap bad_intr
+
+	int_hand     INT_ITLB_MISS, ITLB_MISS, \
+		     do_page_fault, handle_interrupt_no_single_step
+	int_hand     INT_MEM_ERROR, MEM_ERROR, bad_intr
+	int_hand     INT_ILL, ILL, do_trap, handle_ill
+	int_hand     INT_GPV, GPV, do_trap
+	int_hand     INT_SN_ACCESS, SN_ACCESS, do_trap
+	int_hand     INT_IDN_ACCESS, IDN_ACCESS, do_trap
+	int_hand     INT_UDN_ACCESS, UDN_ACCESS, do_trap
+	int_hand     INT_IDN_REFILL, IDN_REFILL, bad_intr
+	int_hand     INT_UDN_REFILL, UDN_REFILL, bad_intr
+	int_hand     INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr
+	int_hand     INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr
+	int_hand     INT_SWINT_3, SWINT_3, do_trap
+	int_hand     INT_SWINT_2, SWINT_2, do_trap
+	int_hand     INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
+	int_hand     INT_SWINT_0, SWINT_0, do_trap
+	int_hand     INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+	int_hand     INT_DTLB_MISS, DTLB_MISS, do_page_fault
+	int_hand     INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
+	int_hand     INT_DMATLB_MISS, DMATLB_MISS, do_page_fault
+	int_hand     INT_DMATLB_ACCESS, DMATLB_ACCESS, do_page_fault
+	int_hand     INT_SNITLB_MISS, SNITLB_MISS, do_page_fault
+	int_hand     INT_SN_NOTIFY, SN_NOTIFY, bad_intr
+	int_hand     INT_SN_FIREWALL, SN_FIREWALL, do_hardwall_trap
+	int_hand     INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr
+	int_hand     INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap
+	int_hand     INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt
+	int_hand     INT_IDN_TIMER, IDN_TIMER, bad_intr
+	int_hand     INT_UDN_TIMER, UDN_TIMER, bad_intr
+	int_hand     INT_DMA_NOTIFY, DMA_NOTIFY, bad_intr
+	int_hand     INT_IDN_CA, IDN_CA, bad_intr
+	int_hand     INT_UDN_CA, UDN_CA, bad_intr
+	int_hand     INT_IDN_AVAIL, IDN_AVAIL, bad_intr
+	int_hand     INT_UDN_AVAIL, UDN_AVAIL, bad_intr
+	int_hand     INT_PERF_COUNT, PERF_COUNT, \
+		     op_handle_perf_interrupt, handle_nmi
+	int_hand     INT_INTCTRL_3, INTCTRL_3, bad_intr
+	int_hand     INT_INTCTRL_2, INTCTRL_2, bad_intr
+	dc_dispatch  INT_INTCTRL_1, INTCTRL_1
+	int_hand     INT_INTCTRL_0, INTCTRL_0, bad_intr
+	int_hand     INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
+		     hv_message_intr, handle_interrupt_downcall
+	int_hand     INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \
+		     tile_dev_intr, handle_interrupt_downcall
+	int_hand     INT_I_ASID, I_ASID, bad_intr
+	int_hand     INT_D_ASID, D_ASID, bad_intr
+	int_hand     INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \
+		     do_page_fault, handle_interrupt_downcall
+	int_hand     INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \
+		     do_page_fault, handle_interrupt_downcall
+	int_hand     INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \
+		     do_page_fault, handle_interrupt_downcall
+	int_hand     INT_SN_CPL, SN_CPL, bad_intr
+	int_hand     INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+	int_hand     INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
+		     op_handle_aux_perf_interrupt, handle_nmi
+#endif
+
+	/* Synthetic interrupt delivered only by the simulator */
+	int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
new file mode 100644
index 0000000..24cc6b2
--- /dev/null
+++ b/arch/tile/kernel/irq.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/uaccess.h>
+#include <hv/drv_pcie_rc_intf.h>
+
+/*
+ * The set of interrupts we enable for raw_local_irq_enable().
+ * This is initialized to have just a single interrupt that the kernel
+ * doesn't actually use as a sentinel.  During kernel init,
+ * interrupts are added as the kernel gets prepared to support them.
+ * NOTE: we could probably initialize them all statically up front.
+ */
+DEFINE_PER_CPU(unsigned long long, interrupts_enabled_mask) =
+  INITIAL_INTERRUPTS_ENABLED;
+EXPORT_PER_CPU_SYMBOL(interrupts_enabled_mask);
+
+/* Define per-tile device interrupt state */
+DEFINE_PER_CPU(HV_IntrState, dev_intr_state);
+
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+
+
+/*
+ * Interrupt dispatcher, invoked upon a hypervisor device interrupt downcall
+ */
+void tile_dev_intr(struct pt_regs *regs, int intnum)
+{
+	int irq;
+
+	/*
+	 * Get the device interrupt pending mask from where the hypervisor
+	 * has tucked it away for us.
+	 */
+	unsigned long pending_dev_intr_mask = __insn_mfspr(SPR_SYSTEM_SAVE_1_3);
+
+
+	/* Track time spent here in an interrupt context. */
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: less than 1/8th stack free? */
+	{
+		long sp = stack_pointer - (long) current_thread_info();
+		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
+			printk(KERN_EMERG "tile_dev_intr: "
+			       "stack overflow: %ld\n",
+			       sp - sizeof(struct thread_info));
+			dump_stack();
+		}
+	}
+#endif
+
+	for (irq = 0; pending_dev_intr_mask; ++irq) {
+		if (pending_dev_intr_mask & 0x1) {
+			generic_handle_irq(irq);
+
+			/* Count device irqs; IPIs are counted elsewhere. */
+			if (irq > HV_MAX_IPI_INTERRUPT)
+				__get_cpu_var(irq_stat).irq_dev_intr_count++;
+		}
+		pending_dev_intr_mask >>= 1;
+	}
+
+	/*
+	 * Track time spent against the current process again and
+	 * process any softirqs if they are waiting.
+	 */
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+
+/* Mask an interrupt. */
+static void hv_dev_irq_mask(unsigned int irq)
+{
+	HV_IntrState *p_intr_state = &__get_cpu_var(dev_intr_state);
+	hv_disable_intr(p_intr_state, 1 << irq);
+}
+
+/* Unmask an interrupt. */
+static void hv_dev_irq_unmask(unsigned int irq)
+{
+	/* Re-enable the hypervisor to generate interrupts. */
+	HV_IntrState *p_intr_state = &__get_cpu_var(dev_intr_state);
+	hv_enable_intr(p_intr_state, 1 << irq);
+}
+
+/*
+ * The HV doesn't latch incoming interrupts while an interrupt is
+ * disabled, so we need to reenable interrupts before running the
+ * handler.
+ *
+ * ISSUE: Enabling the interrupt this early avoids any race conditions
+ * but introduces the possibility of nested interrupt stack overflow.
+ * An imminent change to the HV IRQ model will fix this.
+ */
+static void hv_dev_irq_ack(unsigned int irq)
+{
+	hv_dev_irq_unmask(irq);
+}
+
+/*
+ * Since ack() reenables interrupts, there's nothing to do at eoi().
+ */
+static void hv_dev_irq_eoi(unsigned int irq)
+{
+}
+
+static struct irq_chip hv_dev_irq_chip = {
+	.typename = "hv_dev_irq_chip",
+	.ack = hv_dev_irq_ack,
+	.mask = hv_dev_irq_mask,
+	.unmask = hv_dev_irq_unmask,
+	.eoi = hv_dev_irq_eoi,
+};
+
+static struct irqaction resched_action = {
+	.handler = handle_reschedule_ipi,
+	.name = "resched",
+	.dev_id = handle_reschedule_ipi /* unique token */,
+};
+
+void __init init_IRQ(void)
+{
+	/* Bind IPI irqs. Does this belong somewhere else in init? */
+	tile_irq_activate(IRQ_RESCHEDULE);
+	BUG_ON(setup_irq(IRQ_RESCHEDULE, &resched_action));
+}
+
+void __cpuinit init_per_tile_IRQs(void)
+{
+	int rc;
+
+	/* Set the pointer to the per-tile device interrupt state. */
+	HV_IntrState *sv_ptr = &__get_cpu_var(dev_intr_state);
+	rc = hv_dev_register_intr_state(sv_ptr);
+	if (rc != HV_OK)
+		panic("hv_dev_register_intr_state: error %d", rc);
+
+}
+
+void tile_irq_activate(unsigned int irq)
+{
+	/*
+	 * Paravirtualized drivers can call up to the HV to find out
+	 * which irq they're associated with.  The HV interface
+	 * doesn't provide a generic call for discovering all valid
+	 * IRQs, so drivers must call this method to initialize newly
+	 * discovered IRQs.
+	 *
+	 * We could also just initialize all 32 IRQs at startup, but
+	 * doing so would lead to a kernel fault if an unexpected
+	 * interrupt fires and jumps to a NULL action.  By defering
+	 * the set_irq_chip_and_handler() call, unexpected IRQs are
+	 * handled properly by handle_bad_irq().
+	 */
+	hv_dev_irq_mask(irq);
+	set_irq_chip_and_handler(irq, &hv_dev_irq_chip, handle_percpu_irq);
+}
+
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%-8d", j);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto skip;
+		seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+		seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action = action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+skip:
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	}
+	return 0;
+}
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
new file mode 100644
index 0000000..ed3e1cb
--- /dev/null
+++ b/arch/tile/kernel/machine_kexec.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * based on machine_kexec.c from other architectures in linux-2.6.18
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/highmem.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <linux/timex.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <hv/hypervisor.h>
+
+
+/*
+ * This stuff is not in elf.h and is not in any other kernel include.
+ * This stuff is needed below in the little boot notes parser to
+ * extract the command line so we can pass it to the hypervisor.
+ */
+struct Elf32_Bhdr {
+	Elf32_Word b_signature;
+	Elf32_Word b_size;
+	Elf32_Half b_checksum;
+	Elf32_Half b_records;
+};
+#define ELF_BOOT_MAGIC		0x0E1FB007
+#define EBN_COMMAND_LINE	0x00000004
+#define roundupsz(X) (((X) + 3) & ~3)
+
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+
+void machine_shutdown(void)
+{
+	/*
+	 * Normally we would stop all the other processors here, but
+	 * the check in machine_kexec_prepare below ensures we'll only
+	 * get this far if we've been booted with "nosmp" on the
+	 * command line or without CONFIG_SMP so there's nothing to do
+	 * here (for now).
+	 */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/*
+	 * Cannot happen.  This type of kexec is disabled on this
+	 * architecture (and enforced in machine_kexec_prepare below).
+	 */
+}
+
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	if (num_online_cpus() > 1) {
+		printk(KERN_WARNING "%s: detected attempt to kexec "
+		       "with num_online_cpus() > 1\n",
+		       __func__);
+		return -ENOSYS;
+	}
+	if (image->type != KEXEC_TYPE_DEFAULT) {
+		printk(KERN_WARNING "%s: detected attempt to kexec "
+		       "with unsupported type: %d\n",
+		       __func__,
+		       image->type);
+		return -ENOSYS;
+	}
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	/*
+	 * We did nothing in machine_kexec_prepare,
+	 * so we have nothing to do here.
+	 */
+}
+
+/*
+ * If we can find elf boot notes on this page, return the command
+ * line.  Otherwise, silently return null.  Somewhat kludgy, but no
+ * good way to do this without significantly rearchitecting the
+ * architecture-independent kexec code.
+ */
+
+static unsigned char *kexec_bn2cl(void *pg)
+{
+	struct Elf32_Bhdr *bhdrp;
+	Elf32_Nhdr *nhdrp;
+	unsigned char *desc;
+	unsigned char *command_line;
+	__sum16 csum;
+
+	bhdrp = (struct Elf32_Bhdr *) pg;
+
+	/*
+	 * This routine is invoked for every source page, so make
+	 * sure to quietly ignore every impossible page.
+	 */
+	if (bhdrp->b_signature != ELF_BOOT_MAGIC ||
+	    bhdrp->b_size > PAGE_SIZE)
+		return 0;
+
+	/*
+	 * If we get a checksum mismatch, it's possible that this is
+	 * just a false positive, but relatively unlikely.  We dump
+	 * out the contents of the section so we can diagnose better.
+	 */
+	csum = ip_compute_csum(pg, bhdrp->b_size);
+	if (csum != 0) {
+		int i;
+		unsigned char *p = pg;
+		int nbytes = min((Elf32_Word)1000, bhdrp->b_size);
+		printk(KERN_INFO "%s: bad checksum %#x\n", __func__, csum);
+		printk(KERN_INFO "bytes (%d):", bhdrp->b_size);
+		for (i = 0; i < nbytes; ++i)
+			printk(" %02x", p[i]);
+		if (bhdrp->b_size != nbytes)
+			printk(" ...");
+		printk("\n");
+		return 0;
+	}
+
+	nhdrp = (Elf32_Nhdr *) (bhdrp + 1);
+
+	while (nhdrp->n_type != EBN_COMMAND_LINE) {
+
+		desc = (unsigned char *) (nhdrp + 1);
+		desc += roundupsz(nhdrp->n_descsz);
+
+		nhdrp = (Elf32_Nhdr *) desc;
+
+		/* still in bounds? */
+		if ((unsigned char *) (nhdrp + 1) >
+		    ((unsigned char *) pg) + bhdrp->b_size) {
+
+			printk(KERN_INFO "%s: out of bounds\n", __func__);
+			return 0;
+		}
+	}
+
+	command_line = (unsigned char *) (nhdrp + 1);
+	desc = command_line;
+
+	while (*desc != '\0') {
+		desc++;
+		if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) {
+			printk(KERN_INFO "%s: ran off end of page\n",
+			       __func__);
+			return 0;
+		}
+	}
+
+	return command_line;
+}
+
+static void kexec_find_and_set_command_line(struct kimage *image)
+{
+	kimage_entry_t *ptr, entry;
+
+	unsigned char *command_line = 0;
+	unsigned char *r;
+	HV_Errno hverr;
+
+	for (ptr = &image->head;
+	     (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+		     phys_to_virt((entry & PAGE_MASK)) : ptr + 1) {
+
+		if ((entry & IND_SOURCE)) {
+			void *va =
+				kmap_atomic_pfn(entry >> PAGE_SHIFT, KM_USER0);
+			r = kexec_bn2cl(va);
+			if (r) {
+				command_line = r;
+				break;
+			}
+			kunmap_atomic(va, KM_USER0);
+		}
+	}
+
+	if (command_line != 0) {
+		printk(KERN_INFO "setting new command line to \"%s\"\n",
+		       command_line);
+
+		hverr = hv_set_command_line(
+			(HV_VirtAddr) command_line, strlen(command_line));
+		kunmap_atomic(command_line, KM_USER0);
+	} else {
+		printk(KERN_INFO "%s: no command line found; making empty\n",
+		       __func__);
+		hverr = hv_set_command_line((HV_VirtAddr) command_line, 0);
+	}
+	if (hverr) {
+		printk(KERN_WARNING
+		      "%s: call to hv_set_command_line returned error: %d\n",
+		      __func__, hverr);
+
+	}
+}
+
+/*
+ * The kexec code range-checks all its PAs, so to avoid having it run
+ * amok and allocate memory and then sequester it from every other
+ * controller, we force it to come from controller zero.  We also
+ * disable the oom-killer since if we do end up running out of memory,
+ * that almost certainly won't help.
+ */
+struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order)
+{
+	gfp_mask |= __GFP_THISNODE | __GFP_NORETRY;
+	return alloc_pages_node(0, gfp_mask, order);
+}
+
+static void setup_quasi_va_is_pa(void)
+{
+	HV_PTE *pgtable;
+	HV_PTE pte;
+	int i;
+
+	/*
+	 * Flush our TLB to prevent conflicts between the previous contents
+	 * and the new stuff we're about to add.
+	 */
+	local_flush_tlb_all();
+
+	/* setup VA is PA, at least up to PAGE_OFFSET */
+
+	pgtable = (HV_PTE *)current->mm->pgd;
+	pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
+	pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+
+	for (i = 0; i < pgd_index(PAGE_OFFSET); i++)
+		pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte);
+}
+
+
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+	void *reboot_code_buffer;
+	NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long)
+		ATTRIB_NORET;
+
+	/* Mask all interrupts before starting to reboot. */
+	interrupt_mask_set_mask(~0ULL);
+
+	kexec_find_and_set_command_line(image);
+
+	/*
+	 * Adjust the home caching of the control page to be cached on
+	 * this cpu, and copy the assembly helper into the control
+	 * code page, which we map in the vmalloc area.
+	 */
+	homecache_change_page_home(image->control_code_page, 0,
+				   smp_processor_id());
+	reboot_code_buffer = vmap(&image->control_code_page, 1, 0,
+				  __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE));
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+	       relocate_new_kernel_size);
+	__flush_icache_range(
+		(unsigned long) reboot_code_buffer,
+		(unsigned long) reboot_code_buffer + relocate_new_kernel_size);
+
+	setup_quasi_va_is_pa();
+
+	/* now call it */
+	rnk = reboot_code_buffer;
+	(*rnk)(image->head, reboot_code_buffer, image->start);
+}
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c
new file mode 100644
index 0000000..f991f52
--- /dev/null
+++ b/arch/tile/kernel/messaging.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <asm/hv_driver.h>
+#include <asm/irq_regs.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+
+/* All messages are stored here */
+static DEFINE_PER_CPU(HV_MsgState, msg_state);
+
+void __cpuinit init_messaging()
+{
+	/* Allocate storage for messages in kernel space */
+	HV_MsgState *state = &__get_cpu_var(msg_state);
+	int rc = hv_register_message_state(state);
+	if (rc != HV_OK)
+		panic("hv_register_message_state: error %d", rc);
+
+	/* Make sure downcall interrupts will be enabled. */
+	raw_local_irq_unmask(INT_INTCTRL_1);
+}
+
+void hv_message_intr(struct pt_regs *regs, int intnum)
+{
+	/*
+	 * We enter with interrupts disabled and leave them disabled,
+	 * to match expectations of called functions (e.g.
+	 * do_ccupdate_local() in mm/slab.c).  This is also consistent
+	 * with normal call entry for device interrupts.
+	 */
+
+	int message[HV_MAX_MESSAGE_SIZE/sizeof(int)];
+	HV_RcvMsgInfo rmi;
+	int nmsgs = 0;
+
+	/* Track time spent here in an interrupt context */
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: less than 1/8th stack free? */
+	{
+		long sp = stack_pointer - (long) current_thread_info();
+		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
+			printk(KERN_EMERG "hv_message_intr: "
+			       "stack overflow: %ld\n",
+			       sp - sizeof(struct thread_info));
+			dump_stack();
+		}
+	}
+#endif
+
+	while (1) {
+		rmi = hv_receive_message(__get_cpu_var(msg_state),
+					 (HV_VirtAddr) message,
+					 sizeof(message));
+		if (rmi.msglen == 0)
+			break;
+
+		if (rmi.msglen < 0)
+			panic("hv_receive_message failed: %d", rmi.msglen);
+
+		++nmsgs;
+
+		if (rmi.source == HV_MSG_TILE) {
+			int tag;
+
+			/* we just send tags for now */
+			BUG_ON(rmi.msglen != sizeof(int));
+
+			tag = message[0];
+#ifdef CONFIG_SMP
+			evaluate_message(message[0]);
+#else
+			panic("Received IPI message %d in UP mode", tag);
+#endif
+		} else if (rmi.source == HV_MSG_INTR) {
+			HV_IntrMsg *him = (HV_IntrMsg *)message;
+			struct hv_driver_cb *cb =
+				(struct hv_driver_cb *)him->intarg;
+			cb->callback(cb, him->intdata);
+			__get_cpu_var(irq_stat).irq_hv_msg_count++;
+		}
+	}
+
+	/*
+	 * We shouldn't have gotten a message downcall with no
+	 * messages available.
+	 */
+	if (nmsgs == 0)
+		panic("Message downcall invoked with no messages!");
+
+	/*
+	 * Track time spent against the current process again and
+	 * process any softirqs if they are waiting.
+	 */
+	irq_exit();
+	set_irq_regs(old_regs);
+}
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
new file mode 100644
index 0000000..ed3e911
--- /dev/null
+++ b/arch/tile/kernel/module.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Based on i386 version, copyright (C) 2001 Rusty Russell.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/opcode-tile.h>
+#include <asm/pgtable.h>
+
+#ifdef __tilegx__
+# define Elf_Rela Elf64_Rela
+# define ELF_R_SYM ELF64_R_SYM
+# define ELF_R_TYPE ELF64_R_TYPE
+#else
+# define Elf_Rela Elf32_Rela
+# define ELF_R_SYM ELF32_R_SYM
+# define ELF_R_TYPE ELF32_R_TYPE
+#endif
+
+#ifdef MODULE_DEBUG
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+/*
+ * Allocate some address space in the range MEM_MODULE_START to
+ * MEM_MODULE_END and populate it with memory.
+ */
+void *module_alloc(unsigned long size)
+{
+	struct page **pages;
+	pgprot_t prot_rwx = __pgprot(_PAGE_KERNEL | _PAGE_KERNEL_EXEC);
+	struct vm_struct *area;
+	int i = 0;
+	int npages;
+
+	if (size == 0)
+		return NULL;
+	npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+	if (pages == NULL)
+		return NULL;
+	for (; i < npages; ++i) {
+		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+		if (!pages[i])
+			goto error;
+	}
+
+	area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END);
+	if (!area)
+		goto error;
+
+	if (map_vm_area(area, prot_rwx, &pages)) {
+		vunmap(area->addr);
+		goto error;
+	}
+
+	return area->addr;
+
+error:
+	while (--i >= 0)
+		__free_page(pages[i]);
+	kfree(pages);
+	return NULL;
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/*
+	 * FIXME: If module_region == mod->init_region, trim exception
+	 * table entries.
+	 */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int apply_relocate(Elf_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	printk(KERN_ERR "module %s: .rel relocation unsupported\n", me->name);
+	return -ENOEXEC;
+}
+
+#ifdef __tilegx__
+/*
+ * Validate that the high 16 bits of "value" is just the sign-extension of
+ * the low 48 bits.
+ */
+static int validate_hw2_last(long value, struct module *me)
+{
+	if (((value << 16) >> 16) != value) {
+		printk("module %s: Out of range HW2_LAST value %#lx\n",
+		       me->name, value);
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * Validate that "value" isn't too big to hold in a JumpOff relocation.
+ */
+static int validate_jumpoff(long value)
+{
+	/* Determine size of jump offset. */
+	int shift = __builtin_clzl(get_JumpOff_X1(create_JumpOff_X1(-1)));
+
+	/* Check to see if it fits into the relocation slot. */
+	long f = get_JumpOff_X1(create_JumpOff_X1(value));
+	f = (f << shift) >> shift;
+
+	return f == value;
+}
+#endif
+
+int apply_relocate_add(Elf_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf_Sym *sym;
+	u64 *location;
+	unsigned long value;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/*
+		 * This is the symbol it is referring to.
+		 * Note that all undefined symbols have been resolved.
+		 */
+		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
+			+ ELF_R_SYM(rel[i].r_info);
+		value = sym->st_value + rel[i].r_addend;
+
+		switch (ELF_R_TYPE(rel[i].r_info)) {
+
+#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value)))
+
+#ifndef __tilegx__
+		case R_TILE_32:
+			*(uint32_t *)location = value;
+			break;
+		case R_TILE_IMM16_X0_HA:
+			value = (value + 0x8000) >> 16;
+			/*FALLTHROUGH*/
+		case R_TILE_IMM16_X0_LO:
+			MUNGE(create_Imm16_X0);
+			break;
+		case R_TILE_IMM16_X1_HA:
+			value = (value + 0x8000) >> 16;
+			/*FALLTHROUGH*/
+		case R_TILE_IMM16_X1_LO:
+			MUNGE(create_Imm16_X1);
+			break;
+		case R_TILE_JOFFLONG_X1:
+			value -= (unsigned long) location;  /* pc-relative */
+			value = (long) value >> 3;     /* count by instrs */
+			MUNGE(create_JOffLong_X1);
+			break;
+#else
+		case R_TILEGX_64:
+			*location = value;
+			break;
+		case R_TILEGX_IMM16_X0_HW2_LAST:
+			if (!validate_hw2_last(value, me))
+				return -ENOEXEC;
+			value >>= 16;
+			/*FALLTHROUGH*/
+		case R_TILEGX_IMM16_X0_HW1:
+			value >>= 16;
+			/*FALLTHROUGH*/
+		case R_TILEGX_IMM16_X0_HW0:
+			MUNGE(create_Imm16_X0);
+			break;
+		case R_TILEGX_IMM16_X1_HW2_LAST:
+			if (!validate_hw2_last(value, me))
+				return -ENOEXEC;
+			value >>= 16;
+			/*FALLTHROUGH*/
+		case R_TILEGX_IMM16_X1_HW1:
+			value >>= 16;
+			/*FALLTHROUGH*/
+		case R_TILEGX_IMM16_X1_HW0:
+			MUNGE(create_Imm16_X1);
+			break;
+		case R_TILEGX_JUMPOFF_X1:
+			value -= (unsigned long) location;  /* pc-relative */
+			value = (long) value >> 3;     /* count by instrs */
+			if (!validate_jumpoff(value)) {
+				printk("module %s: Out of range jump to"
+				       " %#llx at %#llx (%p)\n", me->name,
+				       sym->st_value + rel[i].r_addend,
+				       rel[i].r_offset, location);
+				return -ENOEXEC;
+			}
+			MUNGE(create_JumpOff_X1);
+			break;
+#endif
+
+#undef MUNGE
+
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %d\n",
+			       me->name, (int) ELF_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	/* FIXME: perhaps remove the "writable" bit from the TLB? */
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
new file mode 100644
index 0000000..1d45640
--- /dev/null
+++ b/arch/tile/kernel/pci-dma.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+/* Generic DMA mapping functions: */
+
+/*
+ * Allocate what Linux calls "coherent" memory, which for us just
+ * means uncached.
+ */
+void *dma_alloc_coherent(struct device *dev,
+			 size_t size,
+			 dma_addr_t *dma_handle,
+			 gfp_t gfp)
+{
+	u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
+	int node = dev_to_node(dev);
+	int order = get_order(size);
+	struct page *pg;
+	dma_addr_t addr;
+
+	/* Set GFP_KERNEL to ensure we have memory with a kernel VA. */
+	gfp |= GFP_KERNEL | __GFP_ZERO;
+
+	/*
+	 * By forcing NUMA node 0 for 32-bit masks we ensure that the
+	 * high 32 bits of the resulting PA will be zero.  If the mask
+	 * size is, e.g., 24, we may still not be able to guarantee a
+	 * suitable memory address, in which case we will return NULL.
+	 * But such devices are uncommon.
+	 */
+	if (dma_mask <= DMA_BIT_MASK(32))
+		node = 0;
+
+	pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED);
+	if (pg == NULL)
+		return NULL;
+
+	addr = page_to_phys(pg);
+	if (addr + size > dma_mask) {
+		homecache_free_pages(addr, order);
+		return NULL;
+	}
+
+	*dma_handle = addr;
+	return page_address(pg);
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * Free memory that was allocated with dma_alloc_coherent.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+		  void *vaddr, dma_addr_t dma_handle)
+{
+	homecache_free_pages((unsigned long)vaddr, get_order(size));
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * The map routines "map" the specified address range for DMA
+ * accesses.  The memory belongs to the device after this call is
+ * issued, until it is unmapped with dma_unmap_single.
+ *
+ * We don't need to do any mapping, we just flush the address range
+ * out of the cache and return a DMA address.
+ *
+ * The unmap routines do whatever is necessary before the processor
+ * accesses the memory again, and must be called before the driver
+ * touches the memory.  We can get away with a cache invalidate if we
+ * can count on nothing having been touched.
+ */
+
+
+/*
+ * dma_map_single can be passed any memory address, and there appear
+ * to be no alignment constraints.
+ *
+ * There is a chance that the start of the buffer will share a cache
+ * line with some other data that has been touched in the meantime.
+ */
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+	       enum dma_data_direction direction)
+{
+	struct page *page;
+	dma_addr_t dma_addr;
+	int thispage;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(size == 0);
+
+	dma_addr = __pa(ptr);
+
+	/* We might have been handed a buffer that wraps a page boundary */
+	while ((int)size > 0) {
+		/* The amount to flush that's on this page */
+		thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1));
+		thispage = min((int)thispage, (int)size);
+		/* Is this valid for any page we could be handed? */
+		page = pfn_to_page(kaddr_to_pfn(ptr));
+		homecache_flush_cache(page, 0);
+		ptr += thispage;
+		size -= thispage;
+	}
+
+	return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+	   enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+
+	WARN_ON(nents == 0 || sglist->length == 0);
+
+	for_each_sg(sglist, sg, nents, i) {
+		struct page *page;
+		sg->dma_address = sg_phys(sg);
+		page = pfn_to_page(sg->dma_address >> PAGE_SHIFT);
+		homecache_flush_cache(page, 0);
+	}
+
+	return nents;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+			unsigned long offset, size_t size,
+			enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+
+	homecache_flush_cache(page, 0);
+
+	return page_to_pa(page) + offset;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			     size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+				size_t size, enum dma_data_direction direction)
+{
+	unsigned long start = PFN_DOWN(dma_handle);
+	unsigned long end = PFN_DOWN(dma_handle + size - 1);
+	unsigned long i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	for (i = start; i <= end; ++i)
+		homecache_flush_cache(pfn_to_page(i), 0);
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nelems == 0 || sg[0].length == 0);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+/*
+ * Flush and invalidate cache for scatterlist.
+ */
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
+			    int nelems, enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nelems == 0 || sglist->length == 0);
+
+	for_each_sg(sglist, sg, nelems, i) {
+		dma_sync_single_for_device(dev, sg->dma_address,
+					   sg_dma_len(sg), direction);
+	}
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				   unsigned long offset, size_t size,
+				   enum dma_data_direction direction)
+{
+	dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
+}
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev,
+				      dma_addr_t dma_handle,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction direction)
+{
+	dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
+}
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no
+ * need to do any flushing here.
+ */
+void dma_cache_sync(void *vaddr, size_t size,
+		    enum dma_data_direction direction)
+{
+}
+EXPORT_SYMBOL(dma_cache_sync);
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
new file mode 100644
index 0000000..92ef925
--- /dev/null
+++ b/arch/tile/kernel/proc.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/hardirq.h>
+#include <linux/mman.h>
+#include <linux/smp.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/homecache.h>
+#include <arch/chip.h>
+
+
+/*
+ * Support /proc/cpuinfo
+ */
+
+#define cpu_to_ptr(n) ((void *)((long)(n)+1))
+#define ptr_to_cpu(p) ((long)(p) - 1)
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	int n = ptr_to_cpu(v);
+
+	if (n == 0) {
+		char buf[NR_CPUS*5];
+		cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask);
+		seq_printf(m, "cpu count\t: %d\n", num_online_cpus());
+		seq_printf(m, "cpu list\t: %s\n", buf);
+		seq_printf(m, "model name\t: %s\n", chip_model);
+		seq_printf(m, "flags\t\t:\n");  /* nothing for now */
+		seq_printf(m, "cpu MHz\t\t: %llu.%06llu\n",
+			   get_clock_rate() / 1000000,
+			   (get_clock_rate() % 1000000));
+		seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+			   loops_per_jiffy/(500000/HZ),
+			   (loops_per_jiffy/(5000/HZ)) % 100);
+	}
+
+#ifdef CONFIG_SMP
+	if (!cpu_online(n))
+		return 0;
+#endif
+
+	seq_printf(m, "processor\t: %d\n", n);
+
+	/* Print only num_online_cpus() blank lines total. */
+	if (cpumask_next(n, cpu_online_mask) < nr_cpu_ids)
+		seq_printf(m, "\n");
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
new file mode 100644
index 0000000..824f230
--- /dev/null
+++ b/arch/tile/kernel/process.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/kprobes.h>
+#include <linux/elfcore.h>
+#include <linux/tick.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/hardirq.h>
+#include <linux/syscalls.h>
+#include <asm/system.h>
+#include <asm/stack.h>
+#include <asm/homecache.h>
+#include <arch/chip.h>
+#include <arch/abi.h>
+
+
+/*
+ * Use the (x86) "idle=poll" option to prefer low latency when leaving the
+ * idle loop over low power while in the idle loop, e.g. if we have
+ * one thread per core and we want to get threads out of futex waits fast.
+ */
+static int no_idle_nap;
+static int __init idle_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "poll")) {
+		printk("using polling idle threads.\n");
+		no_idle_nap = 1;
+	} else if (!strcmp(str, "halt"))
+		no_idle_nap = 0;
+	else
+		return -1;
+
+	return 0;
+}
+early_param("idle", idle_setup);
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle(void)
+{
+	extern void _cpu_idle(void);
+	int cpu = smp_processor_id();
+
+
+	current_thread_info()->status |= TS_POLLING;
+
+	if (no_idle_nap) {
+		while (1) {
+			while (!need_resched())
+				cpu_relax();
+			schedule();
+		}
+	}
+
+	/* endless idle loop with no priority at all */
+	while (1) {
+		tick_nohz_stop_sched_tick(1);
+		while (!need_resched()) {
+			if (cpu_is_offline(cpu))
+				BUG();  /* no HOTPLUG_CPU */
+
+			local_irq_disable();
+			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+			current_thread_info()->status &= ~TS_POLLING;
+			/*
+			 * TS_POLLING-cleared state must be visible before we
+			 * test NEED_RESCHED:
+			 */
+			smp_mb();
+
+			if (!need_resched())
+				_cpu_idle();
+			else
+				local_irq_enable();
+			current_thread_info()->status |= TS_POLLING;
+		}
+		tick_nohz_restart_sched_tick();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+struct thread_info *alloc_thread_info(struct task_struct *task)
+{
+	struct page *page;
+	int flags = GFP_KERNEL;
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	flags |= __GFP_ZERO;
+#endif
+
+	page = alloc_pages(flags, THREAD_SIZE_ORDER);
+	if (!page)
+		return 0;
+
+	return (struct thread_info *)page_address(page);
+}
+
+/*
+ * Free a thread_info node, and all of its derivative
+ * data structures.
+ */
+void free_thread_info(struct thread_info *info)
+{
+	struct single_step_state *step_state = info->step_state;
+
+
+	if (step_state) {
+
+		/*
+		 * FIXME: we don't munmap step_state->buffer
+		 * because the mm_struct for this process (info->task->mm)
+		 * has already been zeroed in exit_mm().  Keeping a
+		 * reference to it here seems like a bad move, so this
+		 * means we can't munmap() the buffer, and therefore if we
+		 * ptrace multiple threads in a process, we will slowly
+		 * leak user memory.  (Note that as soon as the last
+		 * thread in a process dies, we will reclaim all user
+		 * memory including single-step buffers in the usual way.)
+		 * We should either assign a kernel VA to this buffer
+		 * somehow, or we should associate the buffer(s) with the
+		 * mm itself so we can clean them up that way.
+		 */
+		kfree(step_state);
+	}
+
+	free_page((unsigned long)info);
+}
+
+static void save_arch_state(struct thread_struct *t);
+
+extern void ret_from_fork(void);
+
+int copy_thread(unsigned long clone_flags, unsigned long sp,
+		unsigned long stack_size,
+		struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs;
+	unsigned long ksp;
+
+	/*
+	 * When creating a new kernel thread we pass sp as zero.
+	 * Assign it to a reasonable value now that we have the stack.
+	 */
+	if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0))
+		sp = KSTK_TOP(p);
+
+	/*
+	 * Do not clone step state from the parent; each thread
+	 * must make its own lazily.
+	 */
+	task_thread_info(p)->step_state = NULL;
+
+	/*
+	 * Start new thread in ret_from_fork so it schedules properly
+	 * and then return from interrupt like the parent.
+	 */
+	p->thread.pc = (unsigned long) ret_from_fork;
+
+	/* Save user stack top pointer so we can ID the stack vm area later. */
+	p->thread.usp0 = sp;
+
+	/* Record the pid of the process that created this one. */
+	p->thread.creator_pid = current->pid;
+
+	/*
+	 * Copy the registers onto the kernel stack so the
+	 * return-from-interrupt code will reload it into registers.
+	 */
+	childregs = task_pt_regs(p);
+	*childregs = *regs;
+	childregs->regs[0] = 0;         /* return value is zero */
+	childregs->sp = sp;  /* override with new user stack pointer */
+
+	/*
+	 * Copy the callee-saved registers from the passed pt_regs struct
+	 * into the context-switch callee-saved registers area.
+	 * We have to restore the callee-saved registers since we may
+	 * be cloning a userspace task with userspace register state,
+	 * and we won't be unwinding the same kernel frames to restore them.
+	 * Zero out the C ABI save area to mark the top of the stack.
+	 */
+	ksp = (unsigned long) childregs;
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* interrupt-entry save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
+	memcpy((void *)ksp, &regs->regs[CALLEE_SAVED_FIRST_REG],
+	       CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* __switch_to() save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	p->thread.ksp = ksp;
+
+#if CHIP_HAS_TILE_DMA()
+	/*
+	 * No DMA in the new thread.  We model this on the fact that
+	 * fork() clears the pending signals, alarms, and aio for the child.
+	 */
+	memset(&p->thread.tile_dma_state, 0, sizeof(struct tile_dma_state));
+	memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
+#endif
+
+#if CHIP_HAS_SN_PROC()
+	/* Likewise, the new thread is not running static processor code. */
+	p->thread.sn_proc_running = 0;
+	memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
+#endif
+
+#if CHIP_HAS_PROC_STATUS_SPR()
+	/* New thread has its miscellaneous processor state bits clear. */
+	p->thread.proc_status = 0;
+#endif
+
+
+
+	/*
+	 * Start the new thread with the current architecture state
+	 * (user interrupt masks, etc.).
+	 */
+	save_arch_state(&p->thread);
+
+	return 0;
+}
+
+/*
+ * Return "current" if it looks plausible, or else a pointer to a dummy.
+ * This can be helpful if we are just trying to emit a clean panic.
+ */
+struct task_struct *validate_current(void)
+{
+	static struct task_struct corrupt = { .comm = "<corrupt>" };
+	struct task_struct *tsk = current;
+	if (unlikely((unsigned long)tsk < PAGE_OFFSET ||
+		     (void *)tsk > high_memory ||
+		     ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) {
+		printk("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer);
+		tsk = &corrupt;
+	}
+	return tsk;
+}
+
+/* Take and return the pointer to the previous task, for schedule_tail(). */
+struct task_struct *sim_notify_fork(struct task_struct *prev)
+{
+	struct task_struct *tsk = current;
+	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK_PARENT |
+		     (tsk->thread.creator_pid << _SIM_CONTROL_OPERATOR_BITS));
+	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK |
+		     (tsk->pid << _SIM_CONTROL_OPERATOR_BITS));
+	return prev;
+}
+
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+	struct pt_regs *ptregs = task_pt_regs(tsk);
+	elf_core_copy_regs(regs, ptregs);
+	return 1;
+}
+
+#if CHIP_HAS_TILE_DMA()
+
+/* Allow user processes to access the DMA SPRs */
+void grant_dma_mpls(void)
+{
+	__insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1);
+	__insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1);
+}
+
+/* Forbid user processes from accessing the DMA SPRs */
+void restrict_dma_mpls(void)
+{
+	__insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1);
+	__insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1);
+}
+
+/* Pause the DMA engine, then save off its state registers. */
+static void save_tile_dma_state(struct tile_dma_state *dma)
+{
+	unsigned long state = __insn_mfspr(SPR_DMA_USER_STATUS);
+	unsigned long post_suspend_state;
+
+	/* If we're running, suspend the engine. */
+	if ((state & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK)
+		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
+
+	/*
+	 * Wait for the engine to idle, then save regs.  Note that we
+	 * want to record the "running" bit from before suspension,
+	 * and the "done" bit from after, so that we can properly
+	 * distinguish a case where the user suspended the engine from
+	 * the case where the kernel suspended as part of the context
+	 * swap.
+	 */
+	do {
+		post_suspend_state = __insn_mfspr(SPR_DMA_USER_STATUS);
+	} while (post_suspend_state & SPR_DMA_STATUS__BUSY_MASK);
+
+	dma->src = __insn_mfspr(SPR_DMA_SRC_ADDR);
+	dma->src_chunk = __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR);
+	dma->dest = __insn_mfspr(SPR_DMA_DST_ADDR);
+	dma->dest_chunk = __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR);
+	dma->strides = __insn_mfspr(SPR_DMA_STRIDE);
+	dma->chunk_size = __insn_mfspr(SPR_DMA_CHUNK_SIZE);
+	dma->byte = __insn_mfspr(SPR_DMA_BYTE);
+	dma->status = (state & SPR_DMA_STATUS__RUNNING_MASK) |
+		(post_suspend_state & SPR_DMA_STATUS__DONE_MASK);
+}
+
+/* Restart a DMA that was running before we were context-switched out. */
+static void restore_tile_dma_state(struct thread_struct *t)
+{
+	const struct tile_dma_state *dma = &t->tile_dma_state;
+
+	/*
+	 * The only way to restore the done bit is to run a zero
+	 * length transaction.
+	 */
+	if ((dma->status & SPR_DMA_STATUS__DONE_MASK) &&
+	    !(__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__DONE_MASK)) {
+		__insn_mtspr(SPR_DMA_BYTE, 0);
+		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+		while (__insn_mfspr(SPR_DMA_USER_STATUS) &
+		       SPR_DMA_STATUS__BUSY_MASK)
+			;
+	}
+
+	__insn_mtspr(SPR_DMA_SRC_ADDR, dma->src);
+	__insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR, dma->src_chunk);
+	__insn_mtspr(SPR_DMA_DST_ADDR, dma->dest);
+	__insn_mtspr(SPR_DMA_DST_CHUNK_ADDR, dma->dest_chunk);
+	__insn_mtspr(SPR_DMA_STRIDE, dma->strides);
+	__insn_mtspr(SPR_DMA_CHUNK_SIZE, dma->chunk_size);
+	__insn_mtspr(SPR_DMA_BYTE, dma->byte);
+
+	/*
+	 * Restart the engine if we were running and not done.
+	 * Clear a pending async DMA fault that we were waiting on return
+	 * to user space to execute, since we expect the DMA engine
+	 * to regenerate those faults for us now.  Note that we don't
+	 * try to clear the TIF_ASYNC_TLB flag, since it's relatively
+	 * harmless if set, and it covers both DMA and the SN processor.
+	 */
+	if ((dma->status & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) {
+		t->dma_async_tlb.fault_num = 0;
+		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+	}
+}
+
+#endif
+
+static void save_arch_state(struct thread_struct *t)
+{
+#if CHIP_HAS_SPLIT_INTR_MASK()
+	t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0_0) |
+		((u64)__insn_mfspr(SPR_INTERRUPT_MASK_0_1) << 32);
+#else
+	t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0);
+#endif
+	t->ex_context[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0);
+	t->ex_context[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1);
+	t->system_save[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0);
+	t->system_save[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1);
+	t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
+	t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
+	t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
+#if CHIP_HAS_PROC_STATUS_SPR()
+	t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
+#endif
+}
+
+static void restore_arch_state(const struct thread_struct *t)
+{
+#if CHIP_HAS_SPLIT_INTR_MASK()
+	__insn_mtspr(SPR_INTERRUPT_MASK_0_0, (u32) t->interrupt_mask);
+	__insn_mtspr(SPR_INTERRUPT_MASK_0_1, t->interrupt_mask >> 32);
+#else
+	__insn_mtspr(SPR_INTERRUPT_MASK_0, t->interrupt_mask);
+#endif
+	__insn_mtspr(SPR_EX_CONTEXT_0_0, t->ex_context[0]);
+	__insn_mtspr(SPR_EX_CONTEXT_0_1, t->ex_context[1]);
+	__insn_mtspr(SPR_SYSTEM_SAVE_0_0, t->system_save[0]);
+	__insn_mtspr(SPR_SYSTEM_SAVE_0_1, t->system_save[1]);
+	__insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
+	__insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
+	__insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
+#if CHIP_HAS_PROC_STATUS_SPR()
+	__insn_mtspr(SPR_PROC_STATUS, t->proc_status);
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+	/*
+	 * Clear this whenever we switch back to a process in case
+	 * the previous process was monkeying with it.  Even if enabled
+	 * in CBOX_MSR1 via TILE_RTF_HWM_MIN, it's still just a
+	 * performance hint, so isn't worth a full save/restore.
+	 */
+	__insn_mtspr(SPR_TILE_RTF_HWM, 0);
+#endif
+}
+
+
+void _prepare_arch_switch(struct task_struct *next)
+{
+#if CHIP_HAS_SN_PROC()
+	int snctl;
+#endif
+#if CHIP_HAS_TILE_DMA()
+	struct tile_dma_state *dma = &current->thread.tile_dma_state;
+	if (dma->enabled)
+		save_tile_dma_state(dma);
+#endif
+#if CHIP_HAS_SN_PROC()
+	/*
+	 * Suspend the static network processor if it was running.
+	 * We do not suspend the fabric itself, just like we don't
+	 * try to suspend the UDN.
+	 */
+	snctl = __insn_mfspr(SPR_SNCTL);
+	current->thread.sn_proc_running =
+		(snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
+	if (current->thread.sn_proc_running)
+		__insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
+#endif
+}
+
+
+extern struct task_struct *__switch_to(struct task_struct *prev,
+				       struct task_struct *next,
+				       unsigned long new_system_save_1_0);
+
+struct task_struct *__sched _switch_to(struct task_struct *prev,
+				       struct task_struct *next)
+{
+	/* DMA state is already saved; save off other arch state. */
+	save_arch_state(&prev->thread);
+
+#if CHIP_HAS_TILE_DMA()
+	/*
+	 * Restore DMA in new task if desired.
+	 * Note that it is only safe to restart here since interrupts
+	 * are disabled, so we can't take any DMATLB miss or access
+	 * interrupts before we have finished switching stacks.
+	 */
+	if (next->thread.tile_dma_state.enabled) {
+		restore_tile_dma_state(&next->thread);
+		grant_dma_mpls();
+	} else {
+		restrict_dma_mpls();
+	}
+#endif
+
+	/* Restore other arch state. */
+	restore_arch_state(&next->thread);
+
+#if CHIP_HAS_SN_PROC()
+	/*
+	 * Restart static network processor in the new process
+	 * if it was running before.
+	 */
+	if (next->thread.sn_proc_running) {
+		int snctl = __insn_mfspr(SPR_SNCTL);
+		__insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
+	}
+#endif
+
+
+	/*
+	 * Switch kernel SP, PC, and callee-saved registers.
+	 * In the context of the new task, return the old task pointer
+	 * (i.e. the task that actually called __switch_to).
+	 * Pass the value to use for SYSTEM_SAVE_1_0 when we reset our sp.
+	 */
+	return __switch_to(prev, next, next_current_ksp0(next));
+}
+
+int _sys_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+int _sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       int __user *parent_tidptr, int __user *child_tidptr,
+	       struct pt_regs *regs)
+{
+	if (!newsp)
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0,
+		       parent_tidptr, child_tidptr);
+}
+
+int _sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp,
+		       regs, 0, NULL, NULL);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+int _sys_execve(char __user *path, char __user *__user *argv,
+		char __user *__user *envp, struct pt_regs *regs)
+{
+	int error;
+	char *filename;
+
+	filename = getname(path);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	return error;
+}
+
+#ifdef CONFIG_COMPAT
+int _compat_sys_execve(char __user *path, compat_uptr_t __user *argv,
+		       compat_uptr_t __user *envp, struct pt_regs *regs)
+{
+	int error;
+	char *filename;
+
+	filename = getname(path);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = compat_do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	return error;
+}
+#endif
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	struct KBacktraceIterator kbt;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	for (KBacktraceIterator_init(&kbt, p, NULL);
+	     !KBacktraceIterator_end(&kbt);
+	     KBacktraceIterator_next(&kbt)) {
+		if (!in_sched_functions(kbt.it.pc))
+			return kbt.it.pc;
+	}
+
+	return 0;
+}
+
+/*
+ * We pass in lr as zero (cleared in kernel_thread) and the caller
+ * part of the backtrace ABI on the stack also zeroed (in copy_thread)
+ * so that backtraces will stop with this function.
+ * Note that we don't use r0, since copy_thread() clears it.
+ */
+static void start_kernel_thread(int dummy, int (*fn)(int), int arg)
+{
+	do_exit(fn(arg));
+}
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0);  /* run at kernel PL, no ICS */
+	regs.pc = (long) start_kernel_thread;
+	regs.flags = PT_FLAGS_CALLER_SAVES;   /* need to restore r1 and r2 */
+	regs.regs[1] = (long) fn;             /* function pointer */
+	regs.regs[2] = (long) arg;            /* parameter register */
+
+	/* Ok, create the new process.. */
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs,
+		       0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* Flush thread state. */
+void flush_thread(void)
+{
+	/* Nothing */
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	/* Nothing */
+}
+
+#ifdef __tilegx__
+# define LINECOUNT 3
+# define EXTRA_NL "\n"
+#else
+# define LINECOUNT 4
+# define EXTRA_NL ""
+#endif
+
+void show_regs(struct pt_regs *regs)
+{
+	struct task_struct *tsk = validate_current();
+	int i, linebreak;
+	printk("\n");
+	printk(" Pid: %d, comm: %20s, CPU: %d\n",
+	       tsk->pid, tsk->comm, smp_processor_id());
+	for (i = linebreak = 0; i < 53; ++i) {
+		printk(" r%-2d: "REGFMT, i, regs->regs[i]);
+		if (++linebreak == LINECOUNT) {
+			linebreak = 0;
+			printk("\n");
+		}
+	}
+	printk(" tp : "REGFMT EXTRA_NL " sp : "REGFMT" lr : "REGFMT"\n",
+	       regs->tp, regs->sp, regs->lr);
+	printk(" pc : "REGFMT" ex1: %ld     faultnum: %ld\n",
+	       regs->pc, regs->ex1, regs->faultnum);
+
+	dump_stack_regs(regs);
+}
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
new file mode 100644
index 0000000..4680549
--- /dev/null
+++ b/arch/tile/kernel/ptrace.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Copied from i386: Ross Biro 1/23/92
+ */
+
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+
+void user_enable_single_step(struct task_struct *child)
+{
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+}
+
+/*
+ * This routine will put a word on the process's privileged stack.
+ */
+static void putreg(struct task_struct *task,
+		   unsigned long addr, unsigned long value)
+{
+	unsigned int regno = addr / sizeof(unsigned long);
+	struct pt_regs *childregs = task_pt_regs(task);
+	childregs->regs[regno] = value;
+	childregs->flags |= PT_FLAGS_RESTORE_REGS;
+}
+
+static unsigned long getreg(struct task_struct *task, unsigned long addr)
+{
+	unsigned int regno = addr / sizeof(unsigned long);
+	struct pt_regs *childregs = task_pt_regs(task);
+	return childregs->regs[regno];
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/*
+	 * These two are currently unused, but will be set by arch_ptrace()
+	 * and used in the syscall assembly when we do support them.
+	 */
+	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long __user *datap;
+	unsigned long tmp;
+	int i;
+	long ret = -EIO;
+
+#ifdef CONFIG_COMPAT
+	if (task_thread_info(current)->status & TS_COMPAT)
+		data = (u32)data;
+	if (task_thread_info(child)->status & TS_COMPAT)
+		addr = (u32)addr;
+#endif
+	datap = (unsigned long __user *)data;
+
+	switch (request) {
+
+	case PTRACE_PEEKUSR:  /* Read register from pt_regs. */
+		if (addr & (sizeof(data)-1))
+			break;
+		if (addr < 0 || addr >= PTREGS_SIZE)
+			break;
+		tmp = getreg(child, addr);   /* Read register */
+		ret = put_user(tmp, datap);
+		break;
+
+	case PTRACE_POKEUSR:  /* Write register in pt_regs. */
+		if (addr & (sizeof(data)-1))
+			break;
+		if (addr < 0 || addr >= PTREGS_SIZE)
+			break;
+		putreg(child, addr, data);   /* Write register */
+		break;
+
+	case PTRACE_GETREGS:  /* Get all registers from the child. */
+		if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE))
+			break;
+		for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) {
+			ret = __put_user(getreg(child, i), datap);
+			if (ret != 0)
+				break;
+			datap++;
+		}
+		break;
+
+	case PTRACE_SETREGS:  /* Set all registers in the child. */
+		if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE))
+			break;
+		for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) {
+			ret = __get_user(tmp, datap);
+			if (ret != 0)
+				break;
+			putreg(child, i, tmp);
+			datap++;
+		}
+		break;
+
+	case PTRACE_GETFPREGS:  /* Get the child FPU state. */
+	case PTRACE_SETFPREGS:  /* Set the child FPU state. */
+		break;
+
+	case PTRACE_SETOPTIONS:
+		/* Support TILE-specific ptrace options. */
+		child->ptrace &= ~PT_TRACE_MASK_TILE;
+		tmp = data & PTRACE_O_MASK_TILE;
+		data &= ~PTRACE_O_MASK_TILE;
+		ret = ptrace_request(child, request, addr, data);
+		if (tmp & PTRACE_O_TRACEMIGRATE)
+			child->ptrace |= PT_TRACE_MIGRATE;
+		break;
+
+	default:
+#ifdef CONFIG_COMPAT
+		if (task_thread_info(current)->status & TS_COMPAT) {
+			ret = compat_ptrace_request(child, request,
+						    addr, data);
+			break;
+		}
+#endif
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+/* Not used; we handle compat issues in arch_ptrace() directly. */
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			       compat_ulong_t addr, compat_ulong_t data)
+{
+	BUG();
+}
+#endif
+
+void do_syscall_trace(void)
+{
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	/*
+	 * The 0x80 provides a way for the tracing parent to distinguish
+	 * between a syscall stop and SIGTRAP delivery
+	 */
+	ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+{
+	struct siginfo info;
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = SIGTRAP;
+	info.si_code  = TRAP_BRKPT;
+	info.si_addr  = (void __user *) regs->pc;
+
+	/* Send us the fakey SIGTRAP */
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/* Handle synthetic interrupt delivered only by the simulator. */
+void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
+{
+	send_sigtrap(current, regs, fault_num);
+}
diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c
new file mode 100644
index 0000000..a452392
--- /dev/null
+++ b/arch/tile/kernel/reboot.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/stddef.h>
+#include <linux/reboot.h>
+#include <linux/smp.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <hv/hypervisor.h>
+
+#ifndef CONFIG_SMP
+#define smp_send_stop()
+#endif
+
+void machine_halt(void)
+{
+	warn_early_printk();
+	raw_local_irq_disable_all();
+	smp_send_stop();
+	hv_halt();
+}
+
+void machine_power_off(void)
+{
+	warn_early_printk();
+	raw_local_irq_disable_all();
+	smp_send_stop();
+	hv_power_off();
+}
+
+void machine_restart(char *cmd)
+{
+	raw_local_irq_disable_all();
+	smp_send_stop();
+	hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd);
+}
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void) = machine_power_off;
diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S
new file mode 100644
index 0000000..e88d6e12
--- /dev/null
+++ b/arch/tile/kernel/regs_32.S
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/system.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <arch/spr_def.h>
+#include <asm/processor.h>
+
+/*
+ * See <asm/system.h>; called with prev and next task_struct pointers.
+ * "prev" is returned in r0 for _switch_to and also for ret_from_fork.
+ *
+ * We want to save pc/sp in "prev", and get the new pc/sp from "next".
+ * We also need to save all the callee-saved registers on the stack.
+ *
+ * Intel enables/disables access to the hardware cycle counter in
+ * seccomp (secure computing) environments if necessary, based on
+ * has_secure_computing().  We might want to do this at some point,
+ * though it would require virtualizing the other SPRs under WORLD_ACCESS.
+ *
+ * Since we're saving to the stack, we omit sp from this list.
+ * And for parallels with other architectures, we save lr separately,
+ * in the thread_struct itself (as the "pc" field).
+ *
+ * This code also needs to be aligned with process.c copy_thread()
+ */
+
+#if CALLEE_SAVED_REGS_COUNT != 24
+# error Mismatch between <asm/system.h> and kernel/entry.S
+#endif
+#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4)
+
+#define SAVE_REG(r) { sw r12, r; addi r12, r12, 4 }
+#define LOAD_REG(r) { lw r, r12; addi r12, r12, 4 }
+#define FOR_EACH_CALLEE_SAVED_REG(f)					\
+							f(r30); f(r31); \
+	f(r32); f(r33); f(r34); f(r35);	f(r36); f(r37); f(r38); f(r39); \
+	f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \
+	f(r48); f(r49); f(r50); f(r51); f(r52);
+
+STD_ENTRY_SECTION(__switch_to, .sched.text)
+	{
+	  move r10, sp
+	  sw sp, lr
+	  addi sp, sp, -FRAME_SIZE
+	}
+	{
+	  addi r11, sp, 4
+	  addi r12, sp, 8
+	}
+	{
+	  sw r11, r10
+	  addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET
+	}
+	{
+	  lw r13, r4   /* Load new sp to a temp register early. */
+	  addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET
+	}
+	FOR_EACH_CALLEE_SAVED_REG(SAVE_REG)
+	{
+	  sw r3, sp
+	  addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET
+	}
+	{
+	  sw r3, lr
+	  addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET
+	}
+	{
+	  lw lr, r4
+	  addi r12, r13, 8
+	}
+	{
+	  /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */
+	  move sp, r13
+	  mtspr SYSTEM_SAVE_1_0, r2
+	}
+	FOR_EACH_CALLEE_SAVED_REG(LOAD_REG)
+.L__switch_to_pc:
+	{
+	  addi sp, sp, FRAME_SIZE
+	  jrp lr   /* r0 is still valid here, so return it */
+	}
+	STD_ENDPROC(__switch_to)
+
+/* Return a suitable address for the backtracer for suspended threads */
+STD_ENTRY_SECTION(get_switch_to_pc, .sched.text)
+	lnk r0
+	{
+	  addli r0, r0, .L__switch_to_pc - .
+	  jrp lr
+	}
+	STD_ENDPROC(get_switch_to_pc)
+
+STD_ENTRY(get_pt_regs)
+	.irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \
+		 r8, r9, r10, r11, r12, r13, r14, r15, \
+		 r16, r17, r18, r19, r20, r21, r22, r23, \
+		 r24, r25, r26, r27, r28, r29, r30, r31, \
+		 r32, r33, r34, r35, r36, r37, r38, r39, \
+		 r40, r41, r42, r43, r44, r45, r46, r47, \
+		 r48, r49, r50, r51, r52, tp, sp
+	{
+	 sw r0, \reg
+	 addi r0, r0, 4
+	}
+	.endr
+	{
+	 sw r0, lr
+	 addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR
+	}
+	lnk r1
+	{
+	 sw r0, r1
+	 addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+	}
+	mfspr r1, INTERRUPT_CRITICAL_SECTION
+	shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT
+	ori r1, r1, KERNEL_PL
+	{
+	 sw r0, r1
+	 addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+	}
+	{
+	 sw r0, zero       /* clear faultnum */
+	 addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM
+	}
+	{
+	 sw r0, zero       /* clear orig_r0 */
+	 addli r0, r0, -PTREGS_OFFSET_ORIG_R0    /* restore r0 to base */
+	}
+	jrp lr
+	STD_ENDPROC(get_pt_regs)
diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel.S
new file mode 100644
index 0000000..010b418
--- /dev/null
+++ b/arch/tile/kernel/relocate_kernel.S
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * copy new kernel into place and then call hv_reexec
+ *
+ */
+
+#include <linux/linkage.h>
+#include <arch/chip.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+#define ___hvb	MEM_SV_INTRPT + HV_GLUE_START_CPA
+
+#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f))
+
+#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC)
+#define ___hv_halt         ___hv_dispatch(HV_DISPATCH_HALT)
+#define ___hv_reexec       ___hv_dispatch(HV_DISPATCH_REEXEC)
+#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE)
+
+#undef RELOCATE_NEW_KERNEL_VERBOSE
+
+STD_ENTRY(relocate_new_kernel)
+
+	move	r30, r0		/* page list */
+	move	r31, r1		/* address of page we are on */
+	move	r32, r2		/* start address of new kernel */
+
+	shri	r1, r1, PAGE_SHIFT
+	addi	r1, r1, 1
+	shli	sp, r1, PAGE_SHIFT
+	addi	sp, sp, -8
+	/* we now have a stack (whether we need one or not) */
+
+	moveli	r40, lo16(___hv_console_putc)
+	auli	r40, r40, ha16(___hv_console_putc)
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+	moveli	r0, 'r'
+	jalr	r40
+
+	moveli	r0, '_'
+	jalr	r40
+
+	moveli	r0, 'n'
+	jalr	r40
+
+	moveli	r0, '_'
+	jalr	r40
+
+	moveli	r0, 'k'
+	jalr	r40
+
+	moveli	r0, '\n'
+	jalr	r40
+#endif
+
+	/*
+	 * Throughout this code r30 is pointer to the element of page
+	 * list we are working on.
+	 *
+	 * Normally we get to the next element of the page list by
+	 * incrementing r30 by four.  The exception is if the element
+	 * on the page list is an IND_INDIRECTION in which case we use
+	 * the element with the low bits masked off as the new value
+	 * of r30.
+	 *
+	 * To get this started, we need the value passed to us (which
+	 * will always be an IND_INDIRECTION) in memory somewhere with
+	 * r30 pointing at it.  To do that, we push the value passed
+	 * to us on the stack and make r30 point to it.
+	 */
+
+	sw	sp, r30
+	move	r30, sp
+	addi	sp, sp, -8
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	/*
+	 * On TILEPro, we need to flush all tiles' caches, since we may
+	 * have been doing hash-for-home caching there.  Note that we
+	 * must do this _after_ we're completely done modifying any memory
+	 * other than our output buffer (which we know is locally cached).
+	 * We want the caches to be fully clean when we do the reexec,
+	 * because the hypervisor is going to do this flush again at that
+	 * point, and we don't want that second flush to overwrite any memory.
+	 */
+	{
+	 move	r0, zero	 /* cache_pa */
+	 move	r1, zero
+	}
+	{
+	 auli	r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */
+	 movei	r3, -1		 /* cache_cpumask; -1 means all client tiles */
+	}
+	{
+	 move	r4, zero	 /* tlb_va */
+	 move	r5, zero	 /* tlb_length */
+	}
+	{
+	 move	r6, zero	 /* tlb_pgsize */
+	 move	r7, zero	 /* tlb_cpumask */
+	}
+	{
+	 move	r8, zero	 /* asids */
+	 moveli	r20, lo16(___hv_flush_remote)
+	}
+	{
+	 move	r9, zero	 /* asidcount */
+	 auli	r20, r20, ha16(___hv_flush_remote)
+	}
+
+	jalr	r20
+#endif
+
+	/* r33 is destination pointer, default to zero */
+
+	moveli	r33, 0
+
+.Lloop:	lw	r10, r30
+
+	andi	r9, r10, 0xf	/* low 4 bits tell us what type it is */
+	xor	r10, r10, r9	/* r10 is now value with low 4 bits stripped */
+
+	seqi	r0, r9, 0x1	/* IND_DESTINATION */
+	bzt	r0, .Ltry2
+
+	move	r33, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+	moveli	r0, 'd'
+	jalr	r40
+#endif
+
+	addi	r30, r30, 4
+	j	.Lloop
+
+.Ltry2:
+	seqi	r0, r9, 0x2	/* IND_INDIRECTION */
+	bzt	r0, .Ltry4
+
+	move	r30, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+	moveli	r0, 'i'
+	jalr	r40
+#endif
+
+	j	.Lloop
+
+.Ltry4:
+	seqi	r0, r9, 0x4	/* IND_DONE */
+	bzt	r0, .Ltry8
+
+	mf
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+	moveli	r0, 'D'
+	jalr	r40
+	moveli	r0, '\n'
+	jalr	r40
+#endif
+
+	move	r0, r32
+	moveli	r1, 0		/* arg to hv_reexec is 64 bits */
+
+	moveli	r41, lo16(___hv_reexec)
+	auli	r41, r41, ha16(___hv_reexec)
+
+	jalr	r41
+
+	/* we should not get here */
+
+	moveli	r0, '?'
+	jalr	r40
+	moveli	r0, '\n'
+	jalr	r40
+
+	j	.Lhalt
+
+.Ltry8:	seqi	r0, r9, 0x8	/* IND_SOURCE */
+	bz	r0, .Lerr	/* unknown type */
+
+	/* copy page at r10 to page at r33 */
+
+	move	r11, r33
+
+	moveli	r0, lo16(PAGE_SIZE)
+	auli	r0, r0, ha16(PAGE_SIZE)
+	add	r33, r33, r0
+
+	/* copy word at r10 to word at r11 until r11 equals r33 */
+
+	/* We know page size must be multiple of 16, so we can unroll
+	 * 16 times safely without any edge case checking.
+	 *
+	 * Issue a flush of the destination every 16 words to avoid
+	 * incoherence when starting the new kernel.  (Now this is
+	 * just good paranoia because the hv_reexec call will also
+	 * take care of this.)
+	 */
+
+1:
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0; addi	r11, r11, 4 }
+	{ lw	r0, r10; addi	r10, r10, 4 }
+	{ sw	r11, r0 }
+	{ flush r11    ; addi	r11, r11, 4 }
+
+	seq	r0, r33, r11
+	bzt	r0, 1b
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+	moveli	r0, 's'
+	jalr	r40
+#endif
+
+	addi	r30, r30, 4
+	j	.Lloop
+
+
+.Lerr:	moveli	r0, 'e'
+	jalr	r40
+	moveli	r0, 'r'
+	jalr	r40
+	moveli	r0, 'r'
+	jalr	r40
+	moveli	r0, '\n'
+	jalr	r40
+.Lhalt:
+	moveli	r41, lo16(___hv_halt)
+	auli	r41, r41, ha16(___hv_halt)
+
+	jalr	r41
+	STD_ENDPROC(relocate_new_kernel)
+
+	.section .rodata,"a"
+
+	.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.long .Lend_relocate_new_kernel - relocate_new_kernel
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
new file mode 100644
index 0000000..934136b
--- /dev/null
+++ b/arch/tile/kernel/setup.c
@@ -0,0 +1,1497 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/node.h>
+#include <linux/cpu.h>
+#include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/pci.h>
+#include <linux/initrd.h>
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+
+/* <linux/smp.h> doesn't provide this definition. */
+#ifndef CONFIG_SMP
+#define setup_max_cpus 1
+#endif
+
+static inline int ABS(int x) { return x >= 0 ? x : -x; }
+
+/* Chip information */
+char chip_model[64] __write_once;
+
+struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+/* We only create bootmem data on node 0. */
+static bootmem_data_t __initdata node0_bdata;
+
+/* Information on the NUMA nodes that we compute early */
+unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
+unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
+unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
+unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
+unsigned long __initdata node_free_pfn[MAX_NUMNODES];
+
+#ifdef CONFIG_HIGHMEM
+/* Page frame index of end of lowmem on each controller. */
+unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
+
+/* Number of pages that can be mapped into lowmem. */
+static unsigned long __initdata mappable_physpages;
+#endif
+
+/* Data on which physical memory controller corresponds to which NUMA node */
+int node_controller[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = -1 };
+
+#ifdef CONFIG_HIGHMEM
+/* Map information from VAs to PAs */
+unsigned long pbase_map[1 << (32 - HPAGE_SHIFT)]
+  __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(pbase_map);
+
+/* Map information from PAs to VAs */
+void *vbase_map[NR_PA_HIGHBIT_VALUES]
+  __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(vbase_map);
+#endif
+
+/* Node number as a function of the high PA bits */
+int highbits_to_node[NR_PA_HIGHBIT_VALUES] __write_once;
+EXPORT_SYMBOL(highbits_to_node);
+
+static unsigned int __initdata maxmem_pfn = -1U;
+static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
+	[0 ... MAX_NUMNODES-1] = -1U
+};
+static nodemask_t __initdata isolnodes;
+
+#ifdef CONFIG_PCI
+enum { DEFAULT_PCI_RESERVE_MB = 64 };
+static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
+unsigned long __initdata pci_reserve_start_pfn = -1U;
+unsigned long __initdata pci_reserve_end_pfn = -1U;
+#endif
+
+static int __init setup_maxmem(char *str)
+{
+	long maxmem_mb;
+	if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 ||
+	    maxmem_mb == 0)
+		return -EINVAL;
+
+	maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) <<
+		(HPAGE_SHIFT - PAGE_SHIFT);
+	printk("Forcing RAM used to no more than %dMB\n",
+	       maxmem_pfn >> (20 - PAGE_SHIFT));
+	return 0;
+}
+early_param("maxmem", setup_maxmem);
+
+static int __init setup_maxnodemem(char *str)
+{
+	char *endp;
+	long maxnodemem_mb, node;
+
+	node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
+	if (node >= MAX_NUMNODES || *endp != ':' ||
+	    strict_strtol(endp+1, 0, &maxnodemem_mb) != 0)
+		return -EINVAL;
+
+	maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) <<
+		(HPAGE_SHIFT - PAGE_SHIFT);
+	printk("Forcing RAM used on node %ld to no more than %dMB\n",
+	       node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
+	return 0;
+}
+early_param("maxnodemem", setup_maxnodemem);
+
+static int __init setup_isolnodes(char *str)
+{
+	char buf[MAX_NUMNODES * 5];
+	if (str == NULL || nodelist_parse(str, isolnodes) != 0)
+		return -EINVAL;
+
+	nodelist_scnprintf(buf, sizeof(buf), isolnodes);
+	printk("Set isolnodes value to '%s'\n", buf);
+	return 0;
+}
+early_param("isolnodes", setup_isolnodes);
+
+#ifdef CONFIG_PCI
+static int __init setup_pci_reserve(char* str)
+{
+	unsigned long mb;
+
+	if (str == NULL || strict_strtoul(str, 0, &mb) != 0 ||
+	    mb > 3 * 1024)
+		return -EINVAL;
+
+	pci_reserve_mb = mb;
+	printk("Reserving %dMB for PCIE root complex mappings\n",
+	       pci_reserve_mb);
+	return 0;
+}
+early_param("pci_reserve", setup_pci_reserve);
+#endif
+
+#ifndef __tilegx__
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size' bytes.
+ * This can be used to increase (or decrease) the vmalloc area.
+ */
+static int __init parse_vmalloc(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	VMALLOC_RESERVE = (memparse(arg, &arg) + PGDIR_SIZE - 1) & PGDIR_MASK;
+
+	/* See validate_va() for more on this test. */
+	if ((long)_VMALLOC_START >= 0)
+		early_panic("\"vmalloc=%#lx\" value too large: maximum %#lx\n",
+			    VMALLOC_RESERVE, _VMALLOC_END - 0x80000000UL);
+
+	return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * Determine for each controller where its lowmem is mapped and how
+ * much of it is mapped there.  On controller zero, the first few
+ * megabytes are mapped at 0xfd000000 as code, so in principle we
+ * could start our data mappings higher up, but for now we don't
+ * bother, to avoid additional confusion.
+ *
+ * One question is whether, on systems with more than 768 Mb and
+ * controllers of different sizes, to map in a proportionate amount of
+ * each one, or to try to map the same amount from each controller.
+ * (E.g. if we have three controllers with 256MB, 1GB, and 256MB
+ * respectively, do we map 256MB from each, or do we map 128 MB, 512
+ * MB, and 128 MB respectively?)  For now we use a proportionate
+ * solution like the latter.
+ *
+ * The VA/PA mapping demands that we align our decisions at 16 MB
+ * boundaries so that we can rapidly convert VA to PA.
+ */
+static void *__init setup_pa_va_mapping(void)
+{
+	unsigned long curr_pages = 0;
+	unsigned long vaddr = PAGE_OFFSET;
+	nodemask_t highonlynodes = isolnodes;
+	int i, j;
+
+	memset(pbase_map, -1, sizeof(pbase_map));
+	memset(vbase_map, -1, sizeof(vbase_map));
+
+	/* Node zero cannot be isolated for LOWMEM purposes. */
+	node_clear(0, highonlynodes);
+
+	/* Count up the number of pages on non-highonlynodes controllers. */
+	mappable_physpages = 0;
+	for_each_online_node(i) {
+		if (!node_isset(i, highonlynodes))
+			mappable_physpages +=
+				node_end_pfn[i] - node_start_pfn[i];
+	}
+
+	for_each_online_node(i) {
+		unsigned long start = node_start_pfn[i];
+		unsigned long end = node_end_pfn[i];
+		unsigned long size = end - start;
+		unsigned long vaddr_end;
+
+		if (node_isset(i, highonlynodes)) {
+			/* Mark this controller as having no lowmem. */
+			node_lowmem_end_pfn[i] = start;
+			continue;
+		}
+
+		curr_pages += size;
+		if (mappable_physpages > MAXMEM_PFN) {
+			vaddr_end = PAGE_OFFSET +
+				(((u64)curr_pages * MAXMEM_PFN /
+				  mappable_physpages)
+				 << PAGE_SHIFT);
+		} else {
+			vaddr_end = PAGE_OFFSET + (curr_pages << PAGE_SHIFT);
+		}
+		for (j = 0; vaddr < vaddr_end; vaddr += HPAGE_SIZE, ++j) {
+			unsigned long this_pfn =
+				start + (j << HUGETLB_PAGE_ORDER);
+			pbase_map[vaddr >> HPAGE_SHIFT] = this_pfn;
+			if (vbase_map[__pfn_to_highbits(this_pfn)] ==
+			    (void *)-1)
+				vbase_map[__pfn_to_highbits(this_pfn)] =
+					(void *)(vaddr & HPAGE_MASK);
+		}
+		node_lowmem_end_pfn[i] = start + (j << HUGETLB_PAGE_ORDER);
+		BUG_ON(node_lowmem_end_pfn[i] > end);
+	}
+
+	/* Return highest address of any mapped memory. */
+	return (void *)vaddr;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * Register our most important memory mappings with the debug stub.
+ *
+ * This is up to 4 mappings for lowmem, one mapping per memory
+ * controller, plus one for our text segment.
+ */
+void __cpuinit store_permanent_mappings(void)
+{
+	int i;
+
+	for_each_online_node(i) {
+		HV_PhysAddr pa = ((HV_PhysAddr)node_start_pfn[i]) << PAGE_SHIFT;
+#ifdef CONFIG_HIGHMEM
+		HV_PhysAddr high_mapped_pa = node_lowmem_end_pfn[i];
+#else
+		HV_PhysAddr high_mapped_pa = node_end_pfn[i];
+#endif
+
+		unsigned long pages = high_mapped_pa - node_start_pfn[i];
+		HV_VirtAddr addr = (HV_VirtAddr) __va(pa);
+		hv_store_mapping(addr, pages << PAGE_SHIFT, pa);
+	}
+
+	hv_store_mapping((HV_VirtAddr)_stext,
+			 (uint32_t)(_einittext - _stext), 0);
+}
+
+/*
+ * Use hv_inquire_physical() to populate node_{start,end}_pfn[]
+ * and node_online_map, doing suitable sanity-checking.
+ * Also set min_low_pfn, max_low_pfn, and max_pfn.
+ */
+static void __init setup_memory(void)
+{
+	int i, j;
+	int highbits_seen[NR_PA_HIGHBIT_VALUES] = { 0 };
+#ifdef CONFIG_HIGHMEM
+	long highmem_pages;
+#endif
+#ifndef __tilegx__
+	int cap;
+#endif
+#if defined(CONFIG_HIGHMEM) || defined(__tilegx__)
+	long lowmem_pages;
+#endif
+
+	/* We are using a char to hold the cpu_2_node[] mapping */
+	BUG_ON(MAX_NUMNODES > 127);
+
+	/* Discover the ranges of memory available to us */
+	for (i = 0; ; ++i) {
+		unsigned long start, size, end, highbits;
+		HV_PhysAddrRange range = hv_inquire_physical(i);
+		if (range.size == 0)
+			break;
+#ifdef CONFIG_FLATMEM
+		if (i > 0) {
+			printk("Can't use discontiguous PAs: %#llx..%#llx\n",
+			       range.size, range.start + range.size);
+			continue;
+		}
+#endif
+#ifndef __tilegx__
+		if ((unsigned long)range.start) {
+			printk("Range not at 4GB multiple: %#llx..%#llx\n",
+			       range.start, range.start + range.size);
+			continue;
+		}
+#endif
+		if ((range.start & (HPAGE_SIZE-1)) != 0 ||
+		    (range.size & (HPAGE_SIZE-1)) != 0) {
+			unsigned long long start_pa = range.start;
+			unsigned long long size = range.size;
+			range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK;
+			range.size -= (range.start - start_pa);
+			range.size &= HPAGE_MASK;
+			printk("Range not hugepage-aligned: %#llx..%#llx:"
+			       " now %#llx-%#llx\n",
+			       start_pa, start_pa + size,
+			       range.start, range.start + range.size);
+		}
+		highbits = __pa_to_highbits(range.start);
+		if (highbits >= NR_PA_HIGHBIT_VALUES) {
+			printk("PA high bits too high: %#llx..%#llx\n",
+			       range.start, range.start + range.size);
+			continue;
+		}
+		if (highbits_seen[highbits]) {
+			printk("Range overlaps in high bits: %#llx..%#llx\n",
+			       range.start, range.start + range.size);
+			continue;
+		}
+		highbits_seen[highbits] = 1;
+		if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) {
+			int size = maxnodemem_pfn[i];
+			if (size > 0) {
+				printk("Maxnodemem reduced node %d to"
+				       " %d pages\n", i, size);
+				range.size = (HV_PhysAddr)size << PAGE_SHIFT;
+			} else {
+				printk("Maxnodemem disabled node %d\n", i);
+				continue;
+			}
+		}
+		if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) {
+			int size = maxmem_pfn - num_physpages;
+			if (size > 0) {
+				printk("Maxmem reduced node %d to %d pages\n",
+				       i, size);
+				range.size = (HV_PhysAddr)size << PAGE_SHIFT;
+			} else {
+				printk("Maxmem disabled node %d\n", i);
+				continue;
+			}
+		}
+		if (i >= MAX_NUMNODES) {
+			printk("Too many PA nodes (#%d): %#llx...%#llx\n",
+			       i, range.size, range.size + range.start);
+			continue;
+		}
+
+		start = range.start >> PAGE_SHIFT;
+		size = range.size >> PAGE_SHIFT;
+		end = start + size;
+
+#ifndef __tilegx__
+		if (((HV_PhysAddr)end << PAGE_SHIFT) !=
+		    (range.start + range.size)) {
+			printk("PAs too high to represent: %#llx..%#llx\n",
+			       range.start, range.start + range.size);
+			continue;
+		}
+#endif
+#ifdef CONFIG_PCI
+		/*
+		 * Blocks that overlap the pci reserved region must
+		 * have enough space to hold the maximum percpu data
+		 * region at the top of the range.  If there isn't
+		 * enough space above the reserved region, just
+		 * truncate the node.
+		 */
+		if (start <= pci_reserve_start_pfn &&
+		    end > pci_reserve_start_pfn) {
+			unsigned int per_cpu_size =
+				__per_cpu_end - __per_cpu_start;
+			unsigned int percpu_pages =
+				NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT);
+			if (end < pci_reserve_end_pfn + percpu_pages) {
+				end = pci_reserve_start_pfn;
+				printk("PCI mapping region reduced node %d to"
+				       " %ld pages\n", i, end - start);
+			}
+		}
+#endif
+
+		for (j = __pfn_to_highbits(start);
+		     j <= __pfn_to_highbits(end - 1); j++)
+			highbits_to_node[j] = i;
+
+		node_start_pfn[i] = start;
+		node_end_pfn[i] = end;
+		node_controller[i] = range.controller;
+		num_physpages += size;
+		max_pfn = end;
+
+		/* Mark node as online */
+		node_set(i, node_online_map);
+		node_set(i, node_possible_map);
+	}
+
+#ifndef __tilegx__
+	/*
+	 * For 4KB pages, mem_map "struct page" data is 1% of the size
+	 * of the physical memory, so can be quite big (640 MB for
+	 * four 16G zones).  These structures must be mapped in
+	 * lowmem, and since we currently cap out at about 768 MB,
+	 * it's impractical to try to use this much address space.
+	 * For now, arbitrarily cap the amount of physical memory
+	 * we're willing to use at 8 million pages (32GB of 4KB pages).
+	 */
+	cap = 8 * 1024 * 1024;  /* 8 million pages */
+	if (num_physpages > cap) {
+		int num_nodes = num_online_nodes();
+		int cap_each = cap / num_nodes;
+		unsigned long dropped_pages = 0;
+		for (i = 0; i < num_nodes; ++i) {
+			int size = node_end_pfn[i] - node_start_pfn[i];
+			if (size > cap_each) {
+				dropped_pages += (size - cap_each);
+				node_end_pfn[i] = node_start_pfn[i] + cap_each;
+			}
+		}
+		num_physpages -= dropped_pages;
+		printk(KERN_WARNING "Only using %ldMB memory;"
+		       " ignoring %ldMB.\n",
+		       num_physpages >> (20 - PAGE_SHIFT),
+		       dropped_pages >> (20 - PAGE_SHIFT));
+		printk(KERN_WARNING "Consider using a larger page size.\n");
+	}
+#endif
+
+	/* Heap starts just above the last loaded address. */
+	min_low_pfn = PFN_UP((unsigned long)_end - PAGE_OFFSET);
+
+#ifdef CONFIG_HIGHMEM
+	/* Find where we map lowmem from each controller. */
+	high_memory = setup_pa_va_mapping();
+
+	/* Set max_low_pfn based on what node 0 can directly address. */
+	max_low_pfn = node_lowmem_end_pfn[0];
+
+	lowmem_pages = (mappable_physpages > MAXMEM_PFN) ?
+		MAXMEM_PFN : mappable_physpages;
+	highmem_pages = (long) (num_physpages - lowmem_pages);
+
+	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+	       pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
+	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+			pages_to_mb(lowmem_pages));
+#else
+	/* Set max_low_pfn based on what node 0 can directly address. */
+	max_low_pfn = node_end_pfn[0];
+
+#ifndef __tilegx__
+	if (node_end_pfn[0] > MAXMEM_PFN) {
+		printk(KERN_WARNING "Only using %ldMB LOWMEM.\n",
+		       MAXMEM>>20);
+		printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+		max_low_pfn = MAXMEM_PFN;
+		max_pfn = MAXMEM_PFN;
+		num_physpages = MAXMEM_PFN;
+		node_end_pfn[0] = MAXMEM_PFN;
+	} else {
+		printk(KERN_NOTICE "%ldMB memory available.\n",
+		       pages_to_mb(node_end_pfn[0]));
+	}
+	for (i = 1; i < MAX_NUMNODES; ++i) {
+		node_start_pfn[i] = 0;
+		node_end_pfn[i] = 0;
+	}
+	high_memory = __va(node_end_pfn[0]);
+#else
+	lowmem_pages = 0;
+	for (i = 0; i < MAX_NUMNODES; ++i) {
+		int pages = node_end_pfn[i] - node_start_pfn[i];
+		lowmem_pages += pages;
+		if (pages)
+			high_memory = pfn_to_kaddr(node_end_pfn[i]);
+	}
+	printk(KERN_NOTICE "%ldMB memory available.\n",
+	       pages_to_mb(lowmem_pages));
+#endif
+#endif
+}
+
+static void __init setup_bootmem_allocator(void)
+{
+	unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn;
+
+	/* Provide a node 0 bdata. */
+	NODE_DATA(0)->bdata = &node0_bdata;
+
+#ifdef CONFIG_PCI
+	/* Don't let boot memory alias the PCI region. */
+	last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn);
+#else
+	last_alloc_pfn = max_low_pfn;
+#endif
+
+	/*
+	 * Initialize the boot-time allocator (with low memory only):
+	 * The first argument says where to put the bitmap, and the
+	 * second says where the end of allocatable memory is.
+	 */
+	bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn);
+
+	/*
+	 * Let the bootmem allocator use all the space we've given it
+	 * except for its own bitmap.
+	 */
+	first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size);
+	if (first_alloc_pfn >= last_alloc_pfn)
+		early_panic("Not enough memory on controller 0 for bootmem\n");
+
+	free_bootmem(PFN_PHYS(first_alloc_pfn),
+		     PFN_PHYS(last_alloc_pfn - first_alloc_pfn));
+
+#ifdef CONFIG_KEXEC
+	if (crashk_res.start != crashk_res.end)
+		reserve_bootmem(crashk_res.start,
+			crashk_res.end - crashk_res.start + 1, 0);
+#endif
+
+}
+
+void *__init alloc_remap(int nid, unsigned long size)
+{
+	int pages = node_end_pfn[nid] - node_start_pfn[nid];
+	void *map = pfn_to_kaddr(node_memmap_pfn[nid]);
+	BUG_ON(size != pages * sizeof(struct page));
+	memset(map, 0, size);
+	return map;
+}
+
+static int __init percpu_size(void)
+{
+	int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
+#ifdef CONFIG_MODULES
+	if (size < PERCPU_ENOUGH_ROOM)
+		size = PERCPU_ENOUGH_ROOM;
+#endif
+	/* In several places we assume the per-cpu data fits on a huge page. */
+	BUG_ON(kdata_huge && size > HPAGE_SIZE);
+	return size;
+}
+
+static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
+{
+	void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
+	unsigned long pfn = kaddr_to_pfn(kva);
+	BUG_ON(goal && PFN_PHYS(pfn) != goal);
+	return pfn;
+}
+
+static void __init zone_sizes_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = { 0 };
+	unsigned long node_percpu[MAX_NUMNODES] = { 0 };
+	int size = percpu_size();
+	int num_cpus = smp_height * smp_width;
+	int i;
+
+	for (i = 0; i < num_cpus; ++i)
+		node_percpu[cpu_to_node(i)] += size;
+
+	for_each_online_node(i) {
+		unsigned long start = node_start_pfn[i];
+		unsigned long end = node_end_pfn[i];
+#ifdef CONFIG_HIGHMEM
+		unsigned long lowmem_end = node_lowmem_end_pfn[i];
+#else
+		unsigned long lowmem_end = end;
+#endif
+		int memmap_size = (end - start) * sizeof(struct page);
+		node_free_pfn[i] = start;
+
+		/*
+		 * Set aside pages for per-cpu data and the mem_map array.
+		 *
+		 * Since the per-cpu data requires special homecaching,
+		 * if we are in kdata_huge mode, we put it at the end of
+		 * the lowmem region.  If we're not in kdata_huge mode,
+		 * we take the per-cpu pages from the bottom of the
+		 * controller, since that avoids fragmenting a huge page
+		 * that users might want.  We always take the memmap
+		 * from the bottom of the controller, since with
+		 * kdata_huge that lets it be under a huge TLB entry.
+		 *
+		 * If the user has requested isolnodes for a controller,
+		 * though, there'll be no lowmem, so we just alloc_bootmem
+		 * the memmap.  There will be no percpu memory either.
+		 */
+		if (__pfn_to_highbits(start) == 0) {
+			/* In low PAs, allocate via bootmem. */
+			unsigned long goal = 0;
+			node_memmap_pfn[i] =
+				alloc_bootmem_pfn(memmap_size, goal);
+			if (kdata_huge)
+				goal = PFN_PHYS(lowmem_end) - node_percpu[i];
+			if (node_percpu[i])
+				node_percpu_pfn[i] =
+				    alloc_bootmem_pfn(node_percpu[i], goal);
+		} else if (cpu_isset(i, isolnodes)) {
+			node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
+			BUG_ON(node_percpu[i] != 0);
+		} else {
+			/* In high PAs, just reserve some pages. */
+			node_memmap_pfn[i] = node_free_pfn[i];
+			node_free_pfn[i] += PFN_UP(memmap_size);
+			if (!kdata_huge) {
+				node_percpu_pfn[i] = node_free_pfn[i];
+				node_free_pfn[i] += PFN_UP(node_percpu[i]);
+			} else {
+				node_percpu_pfn[i] =
+					lowmem_end - PFN_UP(node_percpu[i]);
+			}
+		}
+
+#ifdef CONFIG_HIGHMEM
+		if (start > lowmem_end) {
+			zones_size[ZONE_NORMAL] = 0;
+			zones_size[ZONE_HIGHMEM] = end - start;
+		} else {
+			zones_size[ZONE_NORMAL] = lowmem_end - start;
+			zones_size[ZONE_HIGHMEM] = end - lowmem_end;
+		}
+#else
+		zones_size[ZONE_NORMAL] = end - start;
+#endif
+
+		/*
+		 * Everyone shares node 0's bootmem allocator, but
+		 * we use alloc_remap(), above, to put the actual
+		 * struct page array on the individual controllers,
+		 * which is most of the data that we actually care about.
+		 * We can't place bootmem allocators on the other
+		 * controllers since the bootmem allocator can only
+		 * operate on 32-bit physical addresses.
+		 */
+		NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
+
+		free_area_init_node(i, zones_size, start, NULL);
+		printk(KERN_DEBUG "  DMA zone: %ld per-cpu pages\n",
+		       PFN_UP(node_percpu[i]));
+
+		/* Track the type of memory on each node */
+		if (zones_size[ZONE_NORMAL])
+			node_set_state(i, N_NORMAL_MEMORY);
+#ifdef CONFIG_HIGHMEM
+		if (end != start)
+			node_set_state(i, N_HIGH_MEMORY);
+#endif
+
+		node_set_online(i);
+	}
+}
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+struct cpumask node_2_cpu_mask[MAX_NUMNODES] __write_once;
+EXPORT_SYMBOL(node_2_cpu_mask);
+
+/* which node each logical CPU is on */
+char cpu_2_node[NR_CPUS] __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(cpu_2_node);
+
+/* Return cpu_to_node() except for cpus not yet assigned, which return -1 */
+static int __init cpu_to_bound_node(int cpu, struct cpumask* unbound_cpus)
+{
+	if (!cpu_possible(cpu) || cpumask_test_cpu(cpu, unbound_cpus))
+		return -1;
+	else
+		return cpu_to_node(cpu);
+}
+
+/* Return number of immediately-adjacent tiles sharing the same NUMA node. */
+static int __init node_neighbors(int node, int cpu,
+				 struct cpumask *unbound_cpus)
+{
+	int neighbors = 0;
+	int w = smp_width;
+	int h = smp_height;
+	int x = cpu % w;
+	int y = cpu / w;
+	if (x > 0 && cpu_to_bound_node(cpu-1, unbound_cpus) == node)
+		++neighbors;
+	if (x < w-1 && cpu_to_bound_node(cpu+1, unbound_cpus) == node)
+		++neighbors;
+	if (y > 0 && cpu_to_bound_node(cpu-w, unbound_cpus) == node)
+		++neighbors;
+	if (y < h-1 && cpu_to_bound_node(cpu+w, unbound_cpus) == node)
+		++neighbors;
+	return neighbors;
+}
+
+static void __init setup_numa_mapping(void)
+{
+	int distance[MAX_NUMNODES][NR_CPUS];
+	HV_Coord coord;
+	int cpu, node, cpus, i, x, y;
+	int num_nodes = num_online_nodes();
+	struct cpumask unbound_cpus;
+	nodemask_t default_nodes;
+
+	cpumask_clear(&unbound_cpus);
+
+	/* Get set of nodes we will use for defaults */
+	nodes_andnot(default_nodes, node_online_map, isolnodes);
+	if (nodes_empty(default_nodes)) {
+		BUG_ON(!node_isset(0, node_online_map));
+		printk("Forcing NUMA node zero available as a default node\n");
+		node_set(0, default_nodes);
+	}
+
+	/* Populate the distance[] array */
+	memset(distance, -1, sizeof(distance));
+	cpu = 0;
+	for (coord.y = 0; coord.y < smp_height; ++coord.y) {
+		for (coord.x = 0; coord.x < smp_width;
+		     ++coord.x, ++cpu) {
+			BUG_ON(cpu >= nr_cpu_ids);
+			if (!cpu_possible(cpu)) {
+				cpu_2_node[cpu] = -1;
+				continue;
+			}
+			for_each_node_mask(node, default_nodes) {
+				HV_MemoryControllerInfo info =
+					hv_inquire_memory_controller(
+						coord, node_controller[node]);
+				distance[node][cpu] =
+					ABS(info.coord.x) + ABS(info.coord.y);
+			}
+			cpumask_set_cpu(cpu, &unbound_cpus);
+		}
+	}
+	cpus = cpu;
+
+	/*
+	 * Round-robin through the NUMA nodes until all the cpus are
+	 * assigned.  We could be more clever here (e.g. create four
+	 * sorted linked lists on the same set of cpu nodes, and pull
+	 * off them in round-robin sequence, removing from all four
+	 * lists each time) but given the relatively small numbers
+	 * involved, O(n^2) seem OK for a one-time cost.
+	 */
+	node = first_node(default_nodes);
+	while (!cpumask_empty(&unbound_cpus)) {
+		int best_cpu = -1;
+		int best_distance = INT_MAX;
+		for (cpu = 0; cpu < cpus; ++cpu) {
+			if (cpumask_test_cpu(cpu, &unbound_cpus)) {
+				/*
+				 * Compute metric, which is how much
+				 * closer the cpu is to this memory
+				 * controller than the others, shifted
+				 * up, and then the number of
+				 * neighbors already in the node as an
+				 * epsilon adjustment to try to keep
+				 * the nodes compact.
+				 */
+				int d = distance[node][cpu] * num_nodes;
+				for_each_node_mask(i, default_nodes) {
+					if (i != node)
+						d -= distance[i][cpu];
+				}
+				d *= 8;  /* allow space for epsilon */
+				d -= node_neighbors(node, cpu, &unbound_cpus);
+				if (d < best_distance) {
+					best_cpu = cpu;
+					best_distance = d;
+				}
+			}
+		}
+		BUG_ON(best_cpu < 0);
+		cpumask_set_cpu(best_cpu, &node_2_cpu_mask[node]);
+		cpu_2_node[best_cpu] = node;
+		cpumask_clear_cpu(best_cpu, &unbound_cpus);
+		node = next_node(node, default_nodes);
+		if (node == MAX_NUMNODES)
+			node = first_node(default_nodes);
+	}
+
+	/* Print out node assignments and set defaults for disabled cpus */
+	cpu = 0;
+	for (y = 0; y < smp_height; ++y) {
+		printk(KERN_DEBUG "NUMA cpu-to-node row %d:", y);
+		for (x = 0; x < smp_width; ++x, ++cpu) {
+			if (cpu_to_node(cpu) < 0) {
+				printk(" -");
+				cpu_2_node[cpu] = first_node(default_nodes);
+			} else {
+				printk(" %d", cpu_to_node(cpu));
+			}
+		}
+		printk("\n");
+	}
+}
+
+static struct cpu cpu_devices[NR_CPUS];
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for_each_online_node(i)
+		register_one_node(i);
+
+	for_each_present_cpu(i)
+		register_cpu(&cpu_devices[i], i);
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
+
+#else /* !CONFIG_NUMA */
+
+#define setup_numa_mapping() do { } while (0)
+
+#endif /* CONFIG_NUMA */
+
+/**
+ * setup_mpls() - Allow the user-space code to access various SPRs.
+ *
+ * Also called from online_secondary().
+ */
+void __cpuinit setup_mpls(void)
+{
+	/* Allow asynchronous TLB interrupts. */
+#if CHIP_HAS_TILE_DMA()
+	raw_local_irq_unmask(INT_DMATLB_MISS);
+	raw_local_irq_unmask(INT_DMATLB_ACCESS);
+#endif
+#if CHIP_HAS_SN_PROC()
+	raw_local_irq_unmask(INT_SNITLB_MISS);
+#endif
+
+	/*
+	 * Allow user access to many generic SPRs, like the cycle
+	 * counter, PASS/FAIL/DONE, INTERRUPT_CRITICAL_SECTION, etc.
+	 */
+	__insn_mtspr(SPR_MPL_WORLD_ACCESS_SET_0, 1);
+
+#if CHIP_HAS_SN()
+	/* Static network is not restricted. */
+	__insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
+#endif
+#if CHIP_HAS_SN_PROC()
+	__insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
+	__insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
+#endif
+
+	/*
+	 * Set the MPL for interrupt control 0 to user level.
+	 * This includes access to the SYSTEM_SAVE and EX_CONTEXT SPRs,
+	 * as well as the PL 0 interrupt mask.
+	 */
+	__insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1);
+}
+
+static int __initdata set_initramfs_file;
+static char __initdata initramfs_file[128] = "initramfs.cpio.gz";
+
+static int __init setup_initramfs_file(char *str)
+{
+	if (str == NULL)
+		return -EINVAL;
+	strncpy(initramfs_file, str, sizeof(initramfs_file) - 1);
+	set_initramfs_file = 1;
+
+	return 0;
+}
+early_param("initramfs_file", setup_initramfs_file);
+
+/*
+ * We look for an additional "initramfs.cpio.gz" file in the hvfs.
+ * If there is one, we allocate some memory for it and it will be
+ * unpacked to the initramfs after any built-in initramfs_data.
+ */
+static void __init load_hv_initrd(void)
+{
+	HV_FS_StatInfo stat;
+	int fd, rc;
+	void *initrd;
+
+	fd = hv_fs_findfile((HV_VirtAddr) initramfs_file);
+	if (fd == HV_ENOENT) {
+		if (set_initramfs_file)
+			printk("No such hvfs initramfs file '%s'\n",
+			       initramfs_file);
+		return;
+	}
+	BUG_ON(fd < 0);
+	stat = hv_fs_fstat(fd);
+	BUG_ON(stat.size < 0);
+	if (stat.flags & HV_FS_ISDIR) {
+		printk("Ignoring hvfs file '%s': it's a directory.\n",
+		       initramfs_file);
+		return;
+	}
+	initrd = alloc_bootmem_pages(stat.size);
+	rc = hv_fs_pread(fd, (HV_VirtAddr) initrd, stat.size, 0);
+	if (rc != stat.size) {
+		printk("Error reading %d bytes from hvfs file '%s': %d\n",
+		       stat.size, initramfs_file, rc);
+		free_bootmem((unsigned long) initrd, stat.size);
+		return;
+	}
+	initrd_start = (unsigned long) initrd;
+	initrd_end = initrd_start + stat.size;
+}
+
+void __init free_initrd_mem(unsigned long begin, unsigned long end)
+{
+	free_bootmem(begin, end - begin);
+}
+
+static void __init validate_hv(void)
+{
+	/*
+	 * It may already be too late, but let's check our built-in
+	 * configuration against what the hypervisor is providing.
+	 */
+	unsigned long glue_size = hv_sysconf(HV_SYSCONF_GLUE_SIZE);
+	int hv_page_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL);
+	int hv_hpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE);
+	HV_ASIDRange asid_range;
+
+#ifndef CONFIG_SMP
+	HV_Topology topology = hv_inquire_topology();
+	BUG_ON(topology.coord.x != 0 || topology.coord.y != 0);
+	if (topology.width != 1 || topology.height != 1) {
+		printk("Warning: booting UP kernel on %dx%d grid;"
+		       " will ignore all but first tile.\n",
+		       topology.width, topology.height);
+	}
+#endif
+
+	if (PAGE_OFFSET + HV_GLUE_START_CPA + glue_size > (unsigned long)_text)
+		early_panic("Hypervisor glue size %ld is too big!\n",
+			    glue_size);
+	if (hv_page_size != PAGE_SIZE)
+		early_panic("Hypervisor page size %#x != our %#lx\n",
+			    hv_page_size, PAGE_SIZE);
+	if (hv_hpage_size != HPAGE_SIZE)
+		early_panic("Hypervisor huge page size %#x != our %#lx\n",
+			    hv_hpage_size, HPAGE_SIZE);
+
+#ifdef CONFIG_SMP
+	/*
+	 * Some hypervisor APIs take a pointer to a bitmap array
+	 * whose size is at least the number of cpus on the chip.
+	 * We use a struct cpumask for this, so it must be big enough.
+	 */
+	if ((smp_height * smp_width) > nr_cpu_ids)
+		early_panic("Hypervisor %d x %d grid too big for Linux"
+			    " NR_CPUS %d\n", smp_height, smp_width,
+			    nr_cpu_ids);
+#endif
+
+	/*
+	 * Check that we're using allowed ASIDs, and initialize the
+	 * various asid variables to their appropriate initial states.
+	 */
+	asid_range = hv_inquire_asid(0);
+	__get_cpu_var(current_asid) = min_asid = asid_range.start;
+	max_asid = asid_range.start + asid_range.size - 1;
+
+	if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model,
+		       sizeof(chip_model)) < 0) {
+		printk("Warning: HV_CONFSTR_CHIP_MODEL not available\n");
+		strlcpy(chip_model, "unknown", sizeof(chip_model));
+	}
+}
+
+static void __init validate_va(void)
+{
+#ifndef __tilegx__   /* FIXME: GX: probably some validation relevant here */
+	/*
+	 * Similarly, make sure we're only using allowed VAs.
+	 * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT,
+	 * and 0 .. KERNEL_HIGH_VADDR.
+	 * In addition, make sure we CAN'T use the end of memory, since
+	 * we use the last chunk of each pgd for the pgd_list.
+	 */
+	int i, fc_fd_ok = 0;
+	unsigned long max_va = 0;
+	unsigned long list_va =
+		((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT);
+
+	for (i = 0; ; ++i) {
+		HV_VirtAddrRange range = hv_inquire_virtual(i);
+		if (range.size == 0)
+			break;
+		if (range.start <= MEM_USER_INTRPT &&
+		    range.start + range.size >= MEM_HV_INTRPT)
+			fc_fd_ok = 1;
+		if (range.start == 0)
+			max_va = range.size;
+		BUG_ON(range.start + range.size > list_va);
+	}
+	if (!fc_fd_ok)
+		early_panic("Hypervisor not configured for VAs 0xfc/0xfd\n");
+	if (max_va == 0)
+		early_panic("Hypervisor not configured for low VAs\n");
+	if (max_va < KERNEL_HIGH_VADDR)
+		early_panic("Hypervisor max VA %#lx smaller than %#lx\n",
+			    max_va, KERNEL_HIGH_VADDR);
+
+	/* Kernel PCs must have their high bit set; see intvec.S. */
+	if ((long)VMALLOC_START >= 0)
+		early_panic(
+			"Linux VMALLOC region below the 2GB line (%#lx)!\n"
+			"Reconfigure the kernel with fewer NR_HUGE_VMAPS\n"
+			"or smaller VMALLOC_RESERVE.\n",
+			VMALLOC_START);
+#endif
+}
+
+/*
+ * cpu_lotar_map lists all the cpus that are valid for the supervisor
+ * to cache data on at a page level, i.e. what cpus can be placed in
+ * the LOTAR field of a PTE.  It is equivalent to the set of possible
+ * cpus plus any other cpus that are willing to share their cache.
+ * It is set by hv_inquire_tiles(HV_INQ_TILES_LOTAR).
+ */
+struct cpumask __write_once cpu_lotar_map;
+EXPORT_SYMBOL(cpu_lotar_map);
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/*
+ * hash_for_home_map lists all the tiles that hash-for-home data
+ * will be cached on.  Note that this may includes tiles that are not
+ * valid for this supervisor to use otherwise (e.g. if a hypervisor
+ * device is being shared between multiple supervisors).
+ * It is set by hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE).
+ */
+struct cpumask hash_for_home_map;
+EXPORT_SYMBOL(hash_for_home_map);
+#endif
+
+/*
+ * cpu_cacheable_map lists all the cpus whose caches the hypervisor can
+ * flush on our behalf.  It is set to cpu_possible_map OR'ed with
+ * hash_for_home_map, and it is what should be passed to
+ * hv_flush_remote() to flush all caches.  Note that if there are
+ * dedicated hypervisor driver tiles that have authorized use of their
+ * cache, those tiles will only appear in cpu_lotar_map, NOT in
+ * cpu_cacheable_map, as they are a special case.
+ */
+struct cpumask __write_once cpu_cacheable_map;
+EXPORT_SYMBOL(cpu_cacheable_map);
+
+static __initdata struct cpumask disabled_map;
+
+static int __init disabled_cpus(char *str)
+{
+	int boot_cpu = smp_processor_id();
+
+	if (str == NULL || cpulist_parse_crop(str, &disabled_map) != 0)
+		return -EINVAL;
+	if (cpumask_test_cpu(boot_cpu, &disabled_map)) {
+		printk("disabled_cpus: can't disable boot cpu %d\n", boot_cpu);
+		cpumask_clear_cpu(boot_cpu, &disabled_map);
+	}
+	return 0;
+}
+
+early_param("disabled_cpus", disabled_cpus);
+
+void __init print_disabled_cpus()
+{
+	if (!cpumask_empty(&disabled_map)) {
+		char buf[100];
+		cpulist_scnprintf(buf, sizeof(buf), &disabled_map);
+		printk(KERN_INFO "CPUs not available for Linux: %s\n", buf);
+	}
+}
+
+static void __init setup_cpu_maps(void)
+{
+	struct cpumask hv_disabled_map, cpu_possible_init;
+	int boot_cpu = smp_processor_id();
+	int cpus, i, rc;
+
+	/* Learn which cpus are allowed by the hypervisor. */
+	rc = hv_inquire_tiles(HV_INQ_TILES_AVAIL,
+			      (HV_VirtAddr) cpumask_bits(&cpu_possible_init),
+			      sizeof(cpu_cacheable_map));
+	if (rc < 0)
+		early_panic("hv_inquire_tiles(AVAIL) failed: rc %d\n", rc);
+	if (!cpumask_test_cpu(boot_cpu, &cpu_possible_init))
+		early_panic("Boot CPU %d disabled by hypervisor!\n", boot_cpu);
+
+	/* Compute the cpus disabled by the hvconfig file. */
+	cpumask_complement(&hv_disabled_map, &cpu_possible_init);
+
+	/* Include them with the cpus disabled by "disabled_cpus". */
+	cpumask_or(&disabled_map, &disabled_map, &hv_disabled_map);
+
+	/*
+	 * Disable every cpu after "setup_max_cpus".  But don't mark
+	 * as disabled the cpus that are outside of our initial rectangle,
+	 * since that turns out to be confusing.
+	 */
+	cpus = 1;                          /* this cpu */
+	cpumask_set_cpu(boot_cpu, &disabled_map);   /* ignore this cpu */
+	for (i = 0; cpus < setup_max_cpus; ++i)
+		if (!cpumask_test_cpu(i, &disabled_map))
+			++cpus;
+	for (; i < smp_height * smp_width; ++i)
+		cpumask_set_cpu(i, &disabled_map);
+	cpumask_clear_cpu(boot_cpu, &disabled_map); /* reset this cpu */
+	for (i = smp_height * smp_width; i < NR_CPUS; ++i)
+		cpumask_clear_cpu(i, &disabled_map);
+
+	/*
+	 * Setup cpu_possible map as every cpu allocated to us, minus
+	 * the results of any "disabled_cpus" settings.
+	 */
+	cpumask_andnot(&cpu_possible_init, &cpu_possible_init, &disabled_map);
+	init_cpu_possible(&cpu_possible_init);
+
+	/* Learn which cpus are valid for LOTAR caching. */
+	rc = hv_inquire_tiles(HV_INQ_TILES_LOTAR,
+			      (HV_VirtAddr) cpumask_bits(&cpu_lotar_map),
+			      sizeof(cpu_lotar_map));
+	if (rc < 0) {
+		printk("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n");
+		cpu_lotar_map = cpu_possible_map;
+	}
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	/* Retrieve set of CPUs used for hash-for-home caching */
+	rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
+			      (HV_VirtAddr) hash_for_home_map.bits,
+			      sizeof(hash_for_home_map));
+	if (rc < 0)
+		early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
+	cpumask_or(&cpu_cacheable_map, &cpu_possible_map, &hash_for_home_map);
+#else
+	cpu_cacheable_map = cpu_possible_map;
+#endif
+}
+
+
+static int __init dataplane(char *str)
+{
+	printk("WARNING: dataplane support disabled in this kernel\n");
+	return 0;
+}
+
+early_param("dataplane", dataplane);
+
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+	int len;
+
+#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
+	len = hv_get_command_line((HV_VirtAddr) boot_command_line,
+				  COMMAND_LINE_SIZE);
+	if (boot_command_line[0])
+		printk("WARNING: ignoring dynamic command line \"%s\"\n",
+		       boot_command_line);
+	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+	char *hv_cmdline;
+#if defined(CONFIG_CMDLINE_BOOL)
+	if (builtin_cmdline[0]) {
+		int builtin_len = strlcpy(boot_command_line, builtin_cmdline,
+					  COMMAND_LINE_SIZE);
+		if (builtin_len < COMMAND_LINE_SIZE-1)
+			boot_command_line[builtin_len++] = ' ';
+		hv_cmdline = &boot_command_line[builtin_len];
+		len = COMMAND_LINE_SIZE - builtin_len;
+	} else
+#endif
+	{
+		hv_cmdline = boot_command_line;
+		len = COMMAND_LINE_SIZE;
+	}
+	len = hv_get_command_line((HV_VirtAddr) hv_cmdline, len);
+	if (len < 0 || len > COMMAND_LINE_SIZE)
+		early_panic("hv_get_command_line failed: %d\n", len);
+#endif
+
+	*cmdline_p = boot_command_line;
+
+	/* Set disabled_map and setup_max_cpus very early */
+	parse_early_param();
+
+	/* Make sure the kernel is compatible with the hypervisor. */
+	validate_hv();
+	validate_va();
+
+	setup_cpu_maps();
+
+
+#ifdef CONFIG_PCI
+	/*
+	 * Initialize the PCI structures.  This is done before memory
+	 * setup so that we know whether or not a pci_reserve region
+	 * is necessary.
+	 */
+	if (tile_pci_init() == 0)
+		pci_reserve_mb = 0;
+
+	/* PCI systems reserve a region just below 4GB for mapping iomem. */
+	pci_reserve_end_pfn  = (1 << (32 - PAGE_SHIFT));
+	pci_reserve_start_pfn = pci_reserve_end_pfn -
+		(pci_reserve_mb << (20 - PAGE_SHIFT));
+#endif
+
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
+	init_mm.brk = (unsigned long) _end;
+
+	setup_memory();
+	store_permanent_mappings();
+	setup_bootmem_allocator();
+
+	/*
+	 * NOTE: before this point _nobody_ is allowed to allocate
+	 * any memory using the bootmem allocator.
+	 */
+
+	paging_init();
+	setup_numa_mapping();
+	zone_sizes_init();
+	set_page_homes();
+	setup_mpls();
+	setup_clock();
+	load_hv_initrd();
+}
+
+
+/*
+ * Set up per-cpu memory.
+ */
+
+unsigned long __per_cpu_offset[NR_CPUS] __write_once;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static size_t __initdata pfn_offset[MAX_NUMNODES] = { 0 };
+static unsigned long __initdata percpu_pfn[NR_CPUS] = { 0 };
+
+/*
+ * As the percpu code allocates pages, we return the pages from the
+ * end of the node for the specified cpu.
+ */
+static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+{
+	int nid = cpu_to_node(cpu);
+	unsigned long pfn = node_percpu_pfn[nid] + pfn_offset[nid];
+
+	BUG_ON(size % PAGE_SIZE != 0);
+	pfn_offset[nid] += size / PAGE_SIZE;
+	if (percpu_pfn[cpu] == 0)
+		percpu_pfn[cpu] = pfn;
+	return pfn_to_kaddr(pfn);
+}
+
+/*
+ * Pages reserved for percpu memory are not freeable, and in any case we are
+ * on a short path to panic() in setup_per_cpu_area() at this point anyway.
+ */
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+}
+
+/*
+ * Set up vmalloc page tables using bootmem for the percpu code.
+ */
+static void __init pcpu_fc_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	BUG_ON(pgd_addr_invalid(addr));
+
+	pgd = swapper_pg_dir + pgd_index(addr);
+	pud = pud_offset(pgd, addr);
+	BUG_ON(!pud_present(*pud));
+	pmd = pmd_offset(pud, addr);
+	if (pmd_present(*pmd)) {
+		BUG_ON(pmd_huge_page(*pmd));
+	} else {
+		pte = __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE,
+				      HV_PAGE_TABLE_ALIGN, 0);
+		pmd_populate_kernel(&init_mm, pmd, pte);
+	}
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	struct page *pg;
+	unsigned long delta, pfn, lowmem_va;
+	unsigned long size = percpu_size();
+	char *ptr;
+	int rc, cpu, i;
+
+	rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_fc_alloc,
+				   pcpu_fc_free, pcpu_fc_populate_pte);
+	if (rc < 0)
+		panic("Cannot initialize percpu area (err=%d)", rc);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu) {
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+
+		/* finv the copy out of cache so we can change homecache */
+		ptr = pcpu_base_addr + pcpu_unit_offsets[cpu];
+		__finv_buffer(ptr, size);
+		pfn = percpu_pfn[cpu];
+
+		/* Rewrite the page tables to cache on that cpu */
+		pg = pfn_to_page(pfn);
+		for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) {
+
+			/* Update the vmalloc mapping and page home. */
+			pte_t *ptep =
+				virt_to_pte(NULL, (unsigned long)ptr + i);
+			pte_t pte = *ptep;
+			BUG_ON(pfn != pte_pfn(pte));
+			pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
+			pte = set_remote_cache_cpu(pte, cpu);
+			set_pte(ptep, pte);
+
+			/* Update the lowmem mapping for consistency. */
+			lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
+			ptep = virt_to_pte(NULL, lowmem_va);
+			if (pte_huge(*ptep)) {
+				printk(KERN_DEBUG "early shatter of huge page"
+				       " at %#lx\n", lowmem_va);
+				shatter_pmd((pmd_t *)ptep);
+				ptep = virt_to_pte(NULL, lowmem_va);
+				BUG_ON(pte_huge(*ptep));
+			}
+			BUG_ON(pfn != pte_pfn(*ptep));
+			set_pte(ptep, pte);
+		}
+	}
+
+	/* Set our thread pointer appropriately. */
+	set_my_cpu_offset(__per_cpu_offset[smp_processor_id()]);
+
+	/* Make sure the finv's have completed. */
+	mb_incoherent();
+
+	/* Flush the TLB so we reference it properly from here on out. */
+	local_flush_tlb_all();
+}
+
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+/*
+ * We reserve all resources above 4GB so that PCI won't try to put
+ * mappings above 4GB; the standard allows that for some devices but
+ * the probing code trunates values to 32 bits.
+ */
+#ifdef CONFIG_PCI
+static struct resource* __init
+insert_non_bus_resource(void)
+{
+	struct resource *res =
+		kzalloc(sizeof(struct resource), GFP_ATOMIC);
+	res->name = "Non-Bus Physical Address Space";
+	res->start = (1ULL << 32);
+	res->end = -1LL;
+	res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	if (insert_resource(&iomem_resource, res)) {
+		kfree(res);
+		return NULL;
+	}
+	return res;
+}
+#endif
+
+static struct resource* __init
+insert_ram_resource(u64 start_pfn, u64 end_pfn)
+{
+	struct resource *res =
+		kzalloc(sizeof(struct resource), GFP_ATOMIC);
+	res->name = "System RAM";
+	res->start = start_pfn << PAGE_SHIFT;
+	res->end = (end_pfn << PAGE_SHIFT) - 1;
+	res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	if (insert_resource(&iomem_resource, res)) {
+		kfree(res);
+		return NULL;
+	}
+	return res;
+}
+
+/*
+ * Request address space for all standard resources
+ *
+ * If the system includes PCI root complex drivers, we need to create
+ * a window just below 4GB where PCI BARs can be mapped.
+ */
+static int __init request_standard_resources(void)
+{
+	int i;
+	enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+
+	iomem_resource.end = -1LL;
+#ifdef CONFIG_PCI
+	insert_non_bus_resource();
+#endif
+
+	for_each_online_node(i) {
+		u64 start_pfn = node_start_pfn[i];
+		u64 end_pfn = node_end_pfn[i];
+
+#ifdef CONFIG_PCI
+		if (start_pfn <= pci_reserve_start_pfn &&
+		    end_pfn > pci_reserve_start_pfn) {
+			if (end_pfn > pci_reserve_end_pfn)
+				insert_ram_resource(pci_reserve_end_pfn,
+						     end_pfn);
+			end_pfn = pci_reserve_start_pfn;
+		}
+#endif
+		insert_ram_resource(start_pfn, end_pfn);
+	}
+
+	code_resource.start = __pa(_text - CODE_DELTA);
+	code_resource.end = __pa(_etext - CODE_DELTA)-1;
+	data_resource.start = __pa(_sdata);
+	data_resource.end = __pa(_end)-1;
+
+	insert_resource(&iomem_resource, &code_resource);
+	insert_resource(&iomem_resource, &data_resource);
+
+#ifdef CONFIG_KEXEC
+	insert_resource(&iomem_resource, &crashk_res);
+#endif
+
+	return 0;
+}
+
+subsys_initcall(request_standard_resources);
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
new file mode 100644
index 0000000..7ea85eb
--- /dev/null
+++ b/arch/tile/kernel/signal.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 1991, 1992  Linus Torvalds
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/sigframe.h>
+#include <arch/interrupts.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+
+/* Caller before callee in this file; other callee is in assembler */
+void do_signal(struct pt_regs *regs);
+
+int _sys_sigaltstack(const stack_t __user *uss,
+		     stack_t __user *uoss, struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+int restore_sigcontext(struct pt_regs *regs,
+		       struct sigcontext __user *sc, long *pr0)
+{
+	int err = 0;
+	int i;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
+		err |= __get_user(((long *)regs)[i],
+				  &((long *)(&sc->regs))[i]);
+
+	regs->faultnum = INT_SWINT_1_SIGRETURN;
+
+	err |= __get_user(*pr0, &sc->regs.regs[0]);
+	return err;
+}
+
+int _sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)(regs->sp);
+	sigset_t set;
+	long r0;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+		goto badframe;
+
+	return r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+	int i, err = 0;
+
+	for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
+		err |= __put_user(((long *)regs)[i],
+				  &((long *)(&sc->regs))[i]);
+
+	return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *get_sigframe(struct k_sigaction *ka,
+					struct pt_regs *regs,
+					size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->sp;
+
+	/*
+	 * If we are on the alternate signal stack and would overflow
+	 * it, don't.  Return an always-bogus address instead so we
+	 * will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	sp -= frame_size;
+	/*
+	 * Align the stack pointer according to the TILE ABI,
+	 * i.e. so that on function entry (sp & 15) == 0.
+	 */
+	sp &= -16UL;
+	return (void __user *) sp;
+}
+
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			   sigset_t *set, struct pt_regs *regs)
+{
+	unsigned long restorer;
+	struct rt_sigframe __user *frame;
+	int err = 0;
+	int usig;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	/* Always write at least the signal number for the stack backtracer. */
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		/* At sigreturn time, restore the callee-save registers too. */
+		err |= copy_siginfo_to_user(&frame->info, info);
+		regs->flags |= PT_FLAGS_RESTORE_REGS;
+	} else {
+		err |= __put_user(info->si_signo, &frame->info.si_signo);
+	}
+
+	/* Create the ucontext.  */
+	err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user((void *)(current->sas_ss_sp),
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
+
+	restorer = VDSO_BASE;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = (unsigned long) ka->sa.sa_restorer;
+
+	/*
+	 * Set up registers for signal handler.
+	 * Registers that we don't modify keep the value they had from
+	 * user-space at the time we took the signal.
+	 */
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
+	regs->sp = (unsigned long) frame;
+	regs->lr = restorer;
+	regs->regs[0] = (unsigned long) usig;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		/* Need extra arguments, so mark to restore caller-saves. */
+		regs->regs[1] = (unsigned long) &frame->info;
+		regs->regs[2] = (unsigned long) &frame->uc;
+		regs->flags |= PT_FLAGS_CALLER_SAVES;
+	}
+
+	/*
+	 * Notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+	return 0;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+	return -EFAULT;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+static int handle_signal(unsigned long sig, siginfo_t *info,
+			 struct k_sigaction *ka, sigset_t *oldset,
+			 struct pt_regs *regs)
+{
+	int ret;
+
+
+	/* Are we from a system call? */
+	if (regs->faultnum == INT_SWINT_1) {
+		/* If so, check system call restarting.. */
+		switch (regs->regs[0]) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->regs[0] = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->regs[0] = -EINTR;
+				break;
+			}
+			/* fallthrough */
+		case -ERESTARTNOINTR:
+			/* Reload caller-saves to restore r0..r5 and r10. */
+			regs->flags |= PT_FLAGS_CALLER_SAVES;
+			regs->regs[0] = regs->orig_r0;
+			regs->pc -= 8;
+		}
+	}
+
+	/* Set up the stack frame */
+#ifdef CONFIG_COMPAT
+	if (is_compat_task())
+		ret = compat_setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+#endif
+		ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	if (ret == 0) {
+		/* This code is only called from system calls or from
+		 * the work_pending path in the return-to-user code, and
+		 * either way we can re-enable interrupts unconditionally.
+		 */
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,
+			  &current->blocked, &ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked, sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	return ret;
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+void do_signal(struct pt_regs *regs)
+{
+	siginfo_t info;
+	int signr;
+	struct k_sigaction ka;
+	sigset_t *oldset;
+
+	/*
+	 * i386 will check if we're coming from kernel mode and bail out
+	 * here.  In my experience this just turns weird crashes into
+	 * weird spin-hangs.  But if we find a case where this seems
+	 * helpful, we can reinstate the check on "!user_mode(regs)".
+	 */
+
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		/* Whee! Actually deliver the signal.  */
+		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+			/*
+			 * A signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TS_RESTORE_SIGMASK flag.
+			 */
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+		}
+
+		return;
+	}
+
+	/* Did we come from a system call? */
+	if (regs->faultnum == INT_SWINT_1) {
+		/* Restart the system call - no handlers present */
+		switch (regs->regs[0]) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->flags |= PT_FLAGS_CALLER_SAVES;
+			regs->regs[0] = regs->orig_r0;
+			regs->pc -= 8;
+			break;
+
+		case -ERESTART_RESTARTBLOCK:
+			regs->flags |= PT_FLAGS_CALLER_SAVES;
+			regs->regs[TREG_SYSCALL_NR] = __NR_restart_syscall;
+			regs->pc -= 8;
+			break;
+		}
+	}
+
+	/* If there's no signal to deliver, just put the saved sigmask back. */
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
new file mode 100644
index 0000000..266aae1
--- /dev/null
+++ b/arch/tile/kernel/single_step.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * A code-rewriter that enables instruction single-stepping.
+ * Derived from iLib's single-stepping code.
+ */
+
+#ifndef __tilegx__   /* No support for single-step yet. */
+
+/* These functions are only used on the TILE platform */
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <asm/cacheflush.h>
+#include <asm/opcode-tile.h>
+#include <asm/opcode_constants.h>
+#include <arch/abi.h>
+
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
+
+int unaligned_printk;
+
+static int __init setup_unaligned_printk(char *str)
+{
+	long val;
+	if (strict_strtol(str, 0, &val) != 0)
+		return 0;
+	unaligned_printk = val;
+	printk("Printk for each unaligned data accesses is %s\n",
+	       unaligned_printk ? "enabled" : "disabled");
+	return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+
+unsigned int unaligned_fixup_count;
+
+enum mem_op {
+	MEMOP_NONE,
+	MEMOP_LOAD,
+	MEMOP_STORE,
+	MEMOP_LOAD_POSTINCR,
+	MEMOP_STORE_POSTINCR
+};
+
+static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset)
+{
+	tile_bundle_bits result;
+
+	/* mask out the old offset */
+	tile_bundle_bits mask = create_BrOff_X1(-1);
+	result = n & (~mask);
+
+	/* or in the new offset */
+	result |= create_BrOff_X1(offset);
+
+	return result;
+}
+
+static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+{
+	tile_bundle_bits result;
+	tile_bundle_bits op;
+
+	result = n & (~TILE_X1_MASK);
+
+	op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) |
+		create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) |
+		create_Dest_X1(dest) |
+		create_SrcB_X1(TREG_ZERO) |
+		create_SrcA_X1(src) ;
+
+	result |= op;
+	return result;
+}
+
+static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+{
+	return move_X1(n, TREG_ZERO, TREG_ZERO);
+}
+
+static inline tile_bundle_bits addi_X1(
+	tile_bundle_bits n, int dest, int src, int imm)
+{
+	n &= ~TILE_X1_MASK;
+
+	n |=  (create_SrcA_X1(src) |
+	       create_Dest_X1(dest) |
+	       create_Imm8_X1(imm) |
+	       create_S_X1(0) |
+	       create_Opcode_X1(IMM_0_OPCODE_X1) |
+	       create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1));
+
+	return n;
+}
+
+static tile_bundle_bits rewrite_load_store_unaligned(
+	struct single_step_state *state,
+	tile_bundle_bits bundle,
+	struct pt_regs *regs,
+	enum mem_op mem_op,
+	int size, int sign_ext)
+{
+	unsigned char *addr;
+	int val_reg, addr_reg, err, val;
+
+	/* Get address and value registers */
+	if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
+		addr_reg = get_SrcA_Y2(bundle);
+		val_reg = get_SrcBDest_Y2(bundle);
+	} else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+		addr_reg = get_SrcA_X1(bundle);
+		val_reg  = get_Dest_X1(bundle);
+	} else {
+		addr_reg = get_SrcA_X1(bundle);
+		val_reg  = get_SrcB_X1(bundle);
+	}
+
+	/*
+	 * If registers are not GPRs, don't try to handle it.
+	 *
+	 * FIXME: we could handle non-GPR loads by getting the real value
+	 * from memory, writing it to the single step buffer, using a
+	 * temp_reg to hold a pointer to that memory, then executing that
+	 * instruction and resetting temp_reg.  For non-GPR stores, it's a
+	 * little trickier; we could use the single step buffer for that
+	 * too, but we'd have to add some more state bits so that we could
+	 * call back in here to copy that value to the real target.  For
+	 * now, we just handle the simple case.
+	 */
+	if ((val_reg >= PTREGS_NR_GPRS &&
+	     (val_reg != TREG_ZERO ||
+	      mem_op == MEMOP_LOAD ||
+	      mem_op == MEMOP_LOAD_POSTINCR)) ||
+	    addr_reg >= PTREGS_NR_GPRS)
+		return bundle;
+
+	/* If it's aligned, don't handle it specially */
+	addr = (void *)regs->regs[addr_reg];
+	if (((unsigned long)addr % size) == 0)
+		return bundle;
+
+#ifndef __LITTLE_ENDIAN
+# error We assume little-endian representation with copy_xx_user size 2 here
+#endif
+	/* Handle unaligned load/store */
+	if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+		unsigned short val_16;
+		switch (size) {
+		case 2:
+			err = copy_from_user(&val_16, addr, sizeof(val_16));
+			val = sign_ext ? ((short)val_16) : val_16;
+			break;
+		case 4:
+			err = copy_from_user(&val, addr, sizeof(val));
+			break;
+		default:
+			BUG();
+		}
+		if (err == 0) {
+			state->update_reg = val_reg;
+			state->update_value = val;
+			state->update = 1;
+		}
+	} else {
+		val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
+		err = copy_to_user(addr, &val, size);
+	}
+
+	if (err) {
+		siginfo_t info = {
+			.si_signo = SIGSEGV,
+			.si_code = SEGV_MAPERR,
+			.si_addr = (void __user *)addr
+		};
+		force_sig_info(info.si_signo, &info, current);
+		return (tile_bundle_bits) 0;
+	}
+
+	if (unaligned_fixup == 0) {
+		siginfo_t info = {
+			.si_signo = SIGBUS,
+			.si_code = BUS_ADRALN,
+			.si_addr = (void __user *)addr
+		};
+		force_sig_info(info.si_signo, &info, current);
+		return (tile_bundle_bits) 0;
+	}
+
+	if (unaligned_printk || unaligned_fixup_count == 0) {
+		printk("Process %d/%s: PC %#lx: Fixup of"
+		       " unaligned %s at %#lx.\n",
+		       current->pid, current->comm, regs->pc,
+		       (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) ?
+			 "load" : "store",
+		       (unsigned long)addr);
+		if (!unaligned_printk) {
+			printk("\n"
+"Unaligned fixups in the kernel will slow your application considerably.\n"
+"You can find them by writing \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"
+"which requests the kernel show all unaligned fixups, or writing a \"0\"\n"
+"to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"
+"access will become a SIGBUS you can debug. No further warnings will be\n"
+"shown so as to avoid additional slowdown, but you can track the number\n"
+"of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"
+"Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"
+				"\n");
+		}
+	}
+	++unaligned_fixup_count;
+
+	if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
+		/* Convert the Y2 instruction to a prefetch. */
+		bundle &= ~(create_SrcBDest_Y2(-1) |
+			    create_Opcode_Y2(-1));
+		bundle |= (create_SrcBDest_Y2(TREG_ZERO) |
+			   create_Opcode_Y2(LW_OPCODE_Y2));
+	/* Replace the load postincr with an addi */
+	} else if (mem_op == MEMOP_LOAD_POSTINCR) {
+		bundle = addi_X1(bundle, addr_reg, addr_reg,
+				 get_Imm8_X1(bundle));
+	/* Replace the store postincr with an addi */
+	} else if (mem_op == MEMOP_STORE_POSTINCR) {
+		bundle = addi_X1(bundle, addr_reg, addr_reg,
+				 get_Dest_Imm8_X1(bundle));
+	} else {
+		/* Convert the X1 instruction to a nop. */
+		bundle &= ~(create_Opcode_X1(-1) |
+			    create_UnShOpcodeExtension_X1(-1) |
+			    create_UnOpcodeExtension_X1(-1));
+		bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) |
+			   create_UnShOpcodeExtension_X1(
+				   UN_0_SHUN_0_OPCODE_X1) |
+			   create_UnOpcodeExtension_X1(
+				   NOP_UN_0_SHUN_0_OPCODE_X1));
+	}
+
+	return bundle;
+}
+
+/**
+ * single_step_once() - entry point when single stepping has been triggered.
+ * @regs: The machine register state
+ *
+ *  When we arrive at this routine via a trampoline, the single step
+ *  engine copies the executing bundle to the single step buffer.
+ *  If the instruction is a condition branch, then the target is
+ *  reset to one past the next instruction. If the instruction
+ *  sets the lr, then that is noted. If the instruction is a jump
+ *  or call, then the new target pc is preserved and the current
+ *  bundle instruction set to null.
+ *
+ *  The necessary post-single-step rewriting information is stored in
+ *  single_step_state->  We use data segment values because the
+ *  stack will be rewound when we run the rewritten single-stepped
+ *  instruction.
+ */
+void single_step_once(struct pt_regs *regs)
+{
+	extern tile_bundle_bits __single_step_ill_insn;
+	extern tile_bundle_bits __single_step_j_insn;
+	extern tile_bundle_bits __single_step_addli_insn;
+	extern tile_bundle_bits __single_step_auli_insn;
+	struct thread_info *info = (void *)current_thread_info();
+	struct single_step_state *state = info->step_state;
+	int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
+	tile_bundle_bits *buffer, *pc;
+	tile_bundle_bits bundle;
+	int temp_reg;
+	int target_reg = TREG_LR;
+	int err;
+	enum mem_op mem_op = MEMOP_NONE;
+	int size = 0, sign_ext = 0;  /* happy compiler */
+
+	asm(
+"    .pushsection .rodata.single_step\n"
+"    .align 8\n"
+"    .globl    __single_step_ill_insn\n"
+"__single_step_ill_insn:\n"
+"    ill\n"
+"    .globl    __single_step_addli_insn\n"
+"__single_step_addli_insn:\n"
+"    { nop; addli r0, zero, 0 }\n"
+"    .globl    __single_step_auli_insn\n"
+"__single_step_auli_insn:\n"
+"    { nop; auli r0, r0, 0 }\n"
+"    .globl    __single_step_j_insn\n"
+"__single_step_j_insn:\n"
+"    j .\n"
+"    .popsection\n"
+	);
+
+	if (state == NULL) {
+		/* allocate a page of writable, executable memory */
+		state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL);
+		if (state == NULL) {
+			printk("Out of kernel memory trying to single-step\n");
+			return;
+		}
+
+		/* allocate a cache line of writable, executable memory */
+		down_write(&current->mm->mmap_sem);
+		buffer = (void *) do_mmap(0, 0, 64,
+					  PROT_EXEC | PROT_READ | PROT_WRITE,
+					  MAP_PRIVATE | MAP_ANONYMOUS,
+					  0);
+		up_write(&current->mm->mmap_sem);
+
+		if ((int)buffer < 0 && (int)buffer > -PAGE_SIZE) {
+			kfree(state);
+			printk("Out of kernel pages trying to single-step\n");
+			return;
+		}
+
+		state->buffer = buffer;
+		state->is_enabled = 0;
+
+		info->step_state = state;
+
+		/* Validate our stored instruction patterns */
+		BUG_ON(get_Opcode_X1(__single_step_addli_insn) !=
+		       ADDLI_OPCODE_X1);
+		BUG_ON(get_Opcode_X1(__single_step_auli_insn) !=
+		       AULI_OPCODE_X1);
+		BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO);
+		BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0);
+		BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0);
+	}
+
+	/*
+	 * If we are returning from a syscall, we still haven't hit the
+	 * "ill" for the swint1 instruction.  So back the PC up to be
+	 * pointing at the swint1, but we'll actually return directly
+	 * back to the "ill" so we come back in via SIGILL as if we
+	 * had "executed" the swint1 without ever being in kernel space.
+	 */
+	if (regs->faultnum == INT_SWINT_1)
+		regs->pc -= 8;
+
+	pc = (tile_bundle_bits *)(regs->pc);
+	bundle = pc[0];
+
+	/* We'll follow the instruction with 2 ill op bundles */
+	state->orig_pc = (unsigned long) pc;
+	state->next_pc = (unsigned long)(pc + 1);
+	state->branch_next_pc = 0;
+	state->update = 0;
+
+	if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) {
+		/* two wide, check for control flow */
+		int opcode = get_Opcode_X1(bundle);
+
+		switch (opcode) {
+		/* branches */
+		case BRANCH_OPCODE_X1:
+		{
+			int32_t offset = signExtend17(get_BrOff_X1(bundle));
+
+			/*
+			 * For branches, we use a rewriting trick to let the
+			 * hardware evaluate whether the branch is taken or
+			 * untaken.  We record the target offset and then
+			 * rewrite the branch instruction to target 1 insn
+			 * ahead if the branch is taken.  We then follow the
+			 * rewritten branch with two bundles, each containing
+			 * an "ill" instruction. The supervisor examines the
+			 * pc after the single step code is executed, and if
+			 * the pc is the first ill instruction, then the
+			 * branch (if any) was not taken.  If the pc is the
+			 * second ill instruction, then the branch was
+			 * taken. The new pc is computed for these cases, and
+			 * inserted into the registers for the thread.  If
+			 * the pc is the start of the single step code, then
+			 * an exception or interrupt was taken before the
+			 * code started processing, and the same "original"
+			 * pc is restored.  This change, different from the
+			 * original implementation, has the advantage of
+			 * executing a single user instruction.
+			 */
+			state->branch_next_pc = (unsigned long)(pc + offset);
+
+			/* rewrite branch offset to go forward one bundle */
+			bundle = set_BrOff_X1(bundle, 2);
+		}
+		break;
+
+		/* jumps */
+		case JALB_OPCODE_X1:
+		case JALF_OPCODE_X1:
+			state->update = 1;
+			state->next_pc =
+				(unsigned long) (pc + get_JOffLong_X1(bundle));
+			break;
+
+		case JB_OPCODE_X1:
+		case JF_OPCODE_X1:
+			state->next_pc =
+				(unsigned long) (pc + get_JOffLong_X1(bundle));
+			bundle = nop_X1(bundle);
+			break;
+
+		case SPECIAL_0_OPCODE_X1:
+			switch (get_RRROpcodeExtension_X1(bundle)) {
+			/* jump-register */
+			case JALRP_SPECIAL_0_OPCODE_X1:
+			case JALR_SPECIAL_0_OPCODE_X1:
+				state->update = 1;
+				state->next_pc =
+					regs->regs[get_SrcA_X1(bundle)];
+				break;
+
+			case JRP_SPECIAL_0_OPCODE_X1:
+			case JR_SPECIAL_0_OPCODE_X1:
+				state->next_pc =
+					regs->regs[get_SrcA_X1(bundle)];
+				bundle = nop_X1(bundle);
+				break;
+
+			case LNK_SPECIAL_0_OPCODE_X1:
+				state->update = 1;
+				target_reg = get_Dest_X1(bundle);
+				break;
+
+			/* stores */
+			case SH_SPECIAL_0_OPCODE_X1:
+				mem_op = MEMOP_STORE;
+				size = 2;
+				break;
+
+			case SW_SPECIAL_0_OPCODE_X1:
+				mem_op = MEMOP_STORE;
+				size = 4;
+				break;
+			}
+			break;
+
+		/* loads and iret */
+		case SHUN_0_OPCODE_X1:
+			if (get_UnShOpcodeExtension_X1(bundle) ==
+			    UN_0_SHUN_0_OPCODE_X1) {
+				switch (get_UnOpcodeExtension_X1(bundle)) {
+				case LH_UN_0_SHUN_0_OPCODE_X1:
+					mem_op = MEMOP_LOAD;
+					size = 2;
+					sign_ext = 1;
+					break;
+
+				case LH_U_UN_0_SHUN_0_OPCODE_X1:
+					mem_op = MEMOP_LOAD;
+					size = 2;
+					sign_ext = 0;
+					break;
+
+				case LW_UN_0_SHUN_0_OPCODE_X1:
+					mem_op = MEMOP_LOAD;
+					size = 4;
+					break;
+
+				case IRET_UN_0_SHUN_0_OPCODE_X1:
+				{
+					unsigned long ex0_0 = __insn_mfspr(
+						SPR_EX_CONTEXT_0_0);
+					unsigned long ex0_1 = __insn_mfspr(
+						SPR_EX_CONTEXT_0_1);
+					/*
+					 * Special-case it if we're iret'ing
+					 * to PL0 again.  Otherwise just let
+					 * it run and it will generate SIGILL.
+					 */
+					if (EX1_PL(ex0_1) == USER_PL) {
+						state->next_pc = ex0_0;
+						regs->ex1 = ex0_1;
+						bundle = nop_X1(bundle);
+					}
+				}
+				}
+			}
+			break;
+
+#if CHIP_HAS_WH64()
+		/* postincrement operations */
+		case IMM_0_OPCODE_X1:
+			switch (get_ImmOpcodeExtension_X1(bundle)) {
+			case LWADD_IMM_0_OPCODE_X1:
+				mem_op = MEMOP_LOAD_POSTINCR;
+				size = 4;
+				break;
+
+			case LHADD_IMM_0_OPCODE_X1:
+				mem_op = MEMOP_LOAD_POSTINCR;
+				size = 2;
+				sign_ext = 1;
+				break;
+
+			case LHADD_U_IMM_0_OPCODE_X1:
+				mem_op = MEMOP_LOAD_POSTINCR;
+				size = 2;
+				sign_ext = 0;
+				break;
+
+			case SWADD_IMM_0_OPCODE_X1:
+				mem_op = MEMOP_STORE_POSTINCR;
+				size = 4;
+				break;
+
+			case SHADD_IMM_0_OPCODE_X1:
+				mem_op = MEMOP_STORE_POSTINCR;
+				size = 2;
+				break;
+
+			default:
+				break;
+			}
+			break;
+#endif /* CHIP_HAS_WH64() */
+		}
+
+		if (state->update) {
+			/*
+			 * Get an available register.  We start with a
+			 * bitmask with 1's for available registers.
+			 * We truncate to the low 32 registers since
+			 * we are guaranteed to have set bits in the
+			 * low 32 bits, then use ctz to pick the first.
+			 */
+			u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) |
+					   (1ULL << get_SrcA_X0(bundle)) |
+					   (1ULL << get_SrcB_X0(bundle)) |
+					   (1ULL << target_reg));
+			temp_reg = __builtin_ctz(mask);
+			state->update_reg = temp_reg;
+			state->update_value = regs->regs[temp_reg];
+			regs->regs[temp_reg] = (unsigned long) (pc+1);
+			regs->flags |= PT_FLAGS_RESTORE_REGS;
+			bundle = move_X1(bundle, target_reg, temp_reg);
+		}
+	} else {
+		int opcode = get_Opcode_Y2(bundle);
+
+		switch (opcode) {
+		/* loads */
+		case LH_OPCODE_Y2:
+			mem_op = MEMOP_LOAD;
+			size = 2;
+			sign_ext = 1;
+			break;
+
+		case LH_U_OPCODE_Y2:
+			mem_op = MEMOP_LOAD;
+			size = 2;
+			sign_ext = 0;
+			break;
+
+		case LW_OPCODE_Y2:
+			mem_op = MEMOP_LOAD;
+			size = 4;
+			break;
+
+		/* stores */
+		case SH_OPCODE_Y2:
+			mem_op = MEMOP_STORE;
+			size = 2;
+			break;
+
+		case SW_OPCODE_Y2:
+			mem_op = MEMOP_STORE;
+			size = 4;
+			break;
+		}
+	}
+
+	/*
+	 * Check if we need to rewrite an unaligned load/store.
+	 * Returning zero is a special value meaning we need to SIGSEGV.
+	 */
+	if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+		bundle = rewrite_load_store_unaligned(state, bundle, regs,
+						      mem_op, size, sign_ext);
+		if (bundle == 0)
+			return;
+	}
+
+	/* write the bundle to our execution area */
+	buffer = state->buffer;
+	err = __put_user(bundle, buffer++);
+
+	/*
+	 * If we're really single-stepping, we take an INT_ILL after.
+	 * If we're just handling an unaligned access, we can just
+	 * jump directly back to where we were in user code.
+	 */
+	if (is_single_step) {
+		err |= __put_user(__single_step_ill_insn, buffer++);
+		err |= __put_user(__single_step_ill_insn, buffer++);
+	} else {
+		long delta;
+
+		if (state->update) {
+			/* We have some state to update; do it inline */
+			int ha16;
+			bundle = __single_step_addli_insn;
+			bundle |= create_Dest_X1(state->update_reg);
+			bundle |= create_Imm16_X1(state->update_value);
+			err |= __put_user(bundle, buffer++);
+			bundle = __single_step_auli_insn;
+			bundle |= create_Dest_X1(state->update_reg);
+			bundle |= create_SrcA_X1(state->update_reg);
+			ha16 = (state->update_value + 0x8000) >> 16;
+			bundle |= create_Imm16_X1(ha16);
+			err |= __put_user(bundle, buffer++);
+			state->update = 0;
+		}
+
+		/* End with a jump back to the next instruction */
+		delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+			(unsigned long)buffer) >>
+			TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+		bundle = __single_step_j_insn;
+		bundle |= create_JOffLong_X1(delta);
+		err |= __put_user(bundle, buffer++);
+	}
+
+	if (err) {
+		printk("Fault when writing to single-step buffer\n");
+		return;
+	}
+
+	/*
+	 * Flush the buffer.
+	 * We do a local flush only, since this is a thread-specific buffer.
+	 */
+	__flush_icache_range((unsigned long) state->buffer,
+			     (unsigned long) buffer);
+
+	/* Indicate enabled */
+	state->is_enabled = is_single_step;
+	regs->pc = (unsigned long) state->buffer;
+
+	/* Fault immediately if we are coming back from a syscall. */
+	if (regs->faultnum == INT_SWINT_1)
+		regs->pc += 8;
+}
+
+#endif /* !__tilegx__ */
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
new file mode 100644
index 0000000..782c1bf
--- /dev/null
+++ b/arch/tile/kernel/smp.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * TILE SMP support routines.
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <asm/cacheflush.h>
+
+HV_Topology smp_topology __write_once;
+
+
+/*
+ * Top-level send_IPI*() functions to send messages to other cpus.
+ */
+
+/* Set by smp_send_stop() to avoid recursive panics. */
+static int stopping_cpus;
+
+void send_IPI_single(int cpu, int tag)
+{
+	HV_Recipient recip = {
+		.y = cpu / smp_width,
+		.x = cpu % smp_width,
+		.state = HV_TO_BE_SENT
+	};
+	int rc = hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag));
+	BUG_ON(rc <= 0);
+}
+
+void send_IPI_many(const struct cpumask *mask, int tag)
+{
+	HV_Recipient recip[NR_CPUS];
+	int cpu, sent;
+	int nrecip = 0;
+	int my_cpu = smp_processor_id();
+	for_each_cpu(cpu, mask) {
+		HV_Recipient *r;
+		BUG_ON(cpu == my_cpu);
+		r = &recip[nrecip++];
+		r->y = cpu / smp_width;
+		r->x = cpu % smp_width;
+		r->state = HV_TO_BE_SENT;
+	}
+	sent = 0;
+	while (sent < nrecip) {
+		int rc = hv_send_message(recip, nrecip,
+					 (HV_VirtAddr)&tag, sizeof(tag));
+		if (rc <= 0) {
+			if (!stopping_cpus)  /* avoid recursive panic */
+				panic("hv_send_message returned %d", rc);
+			break;
+		}
+		sent += rc;
+	}
+}
+
+void send_IPI_allbutself(int tag)
+{
+	struct cpumask mask;
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	send_IPI_many(&mask, tag);
+}
+
+
+/*
+ * Provide smp_call_function_mask, but also run function locally
+ * if specified in the mask.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
+		      void *info, bool wait)
+{
+	int cpu = get_cpu();
+	smp_call_function_many(mask, func, info, wait);
+	if (cpumask_test_cpu(cpu, mask)) {
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
+	}
+	put_cpu();
+}
+
+
+/*
+ * Functions related to starting/stopping cpus.
+ */
+
+/* Handler to start the current cpu. */
+static void smp_start_cpu_interrupt(void)
+{
+	extern unsigned long start_cpu_function_addr;
+	get_irq_regs()->pc = start_cpu_function_addr;
+}
+
+/* Handler to stop the current cpu. */
+static void smp_stop_cpu_interrupt(void)
+{
+	set_cpu_online(smp_processor_id(), 0);
+	raw_local_irq_disable_all();
+	for (;;)
+		asm("nap");
+}
+
+/* This function calls the 'stop' function on all other CPUs in the system. */
+void smp_send_stop(void)
+{
+	stopping_cpus = 1;
+	send_IPI_allbutself(MSG_TAG_STOP_CPU);
+}
+
+
+/*
+ * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages.
+ */
+void evaluate_message(int tag)
+{
+	switch (tag) {
+	case MSG_TAG_START_CPU: /* Start up a cpu */
+		smp_start_cpu_interrupt();
+		break;
+
+	case MSG_TAG_STOP_CPU: /* Sent to shut down slave CPU's */
+		smp_stop_cpu_interrupt();
+		break;
+
+	case MSG_TAG_CALL_FUNCTION_MANY: /* Call function on cpumask */
+		generic_smp_call_function_interrupt();
+		break;
+
+	case MSG_TAG_CALL_FUNCTION_SINGLE: /* Call function on one other CPU */
+		generic_smp_call_function_single_interrupt();
+		break;
+
+	default:
+		panic("Unknown IPI message tag %d", tag);
+		break;
+	}
+}
+
+
+/*
+ * flush_icache_range() code uses smp_call_function().
+ */
+
+struct ipi_flush {
+	unsigned long start;
+	unsigned long end;
+};
+
+static void ipi_flush_icache_range(void *info)
+{
+	struct ipi_flush *flush = (struct ipi_flush *) info;
+	__flush_icache_range(flush->start, flush->end);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct ipi_flush flush = { start, end };
+	preempt_disable();
+	on_each_cpu(ipi_flush_icache_range, &flush, 1);
+	preempt_enable();
+}
+
+
+/*
+ * The smp_send_reschedule() path does not use the hv_message_intr()
+ * path but instead the faster tile_dev_intr() path for interrupts.
+ */
+
+irqreturn_t handle_reschedule_ipi(int irq, void *token)
+{
+	/*
+	 * Nothing to do here; when we return from interrupt, the
+	 * rescheduling will occur there. But do bump the interrupt
+	 * profiler count in the meantime.
+	 */
+	__get_cpu_var(irq_stat).irq_resched_count++;
+
+	return IRQ_HANDLED;
+}
+
+void smp_send_reschedule(int cpu)
+{
+	HV_Coord coord;
+
+	WARN_ON(cpu_is_offline(cpu));
+	coord.y = cpu / smp_width;
+	coord.x = cpu % smp_width;
+	hv_trigger_ipi(coord, IRQ_RESCHEDULE);
+}
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
new file mode 100644
index 0000000..aa3aafd
--- /dev/null
+++ b/arch/tile/kernel/smpboot.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+
+/*
+ * This assembly function is provided in entry.S.
+ * When called, it loops on a nap instruction forever.
+ * FIXME: should be in a header somewhere.
+ */
+extern void smp_nap(void);
+
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
+/* The messaging code jumps to this pointer during boot-up */
+unsigned long start_cpu_function_addr;
+
+/* Called very early during startup to mark boot cpu as online */
+void __init smp_prepare_boot_cpu(void)
+{
+	int cpu = smp_processor_id();
+	set_cpu_online(cpu, 1);
+	set_cpu_present(cpu, 1);
+	__get_cpu_var(cpu_state) = CPU_ONLINE;
+
+	init_messaging();
+}
+
+static void start_secondary(void);
+
+/*
+ * Called at the top of init() to launch all the other CPUs.
+ * They run free to complete their initialization and then wait
+ * until they get an IPI from the boot cpu to come online.
+ */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	long rc;
+	int cpu, cpu_count;
+	int boot_cpu = smp_processor_id();
+
+	current_thread_info()->cpu = boot_cpu;
+
+	/*
+	 * Pin this task to the boot CPU while we bring up the others,
+	 * just to make sure we don't uselessly migrate as they come up.
+	 */
+	rc = sched_setaffinity(current->pid, cpumask_of(boot_cpu));
+	if (rc != 0)
+		printk("Couldn't set init affinity to boot cpu (%ld)\n", rc);
+
+	/* Print information about disabled and dataplane cpus. */
+	print_disabled_cpus();
+
+	/*
+	 * Tell the messaging subsystem how to respond to the
+	 * startup message.  We use a level of indirection to avoid
+	 * confusing the linker with the fact that the messaging
+	 * subsystem is calling __init code.
+	 */
+	start_cpu_function_addr = (unsigned long) &online_secondary;
+
+	/* Set up thread context for all new processors. */
+	cpu_count = 1;
+	for (cpu = 0; cpu < NR_CPUS; ++cpu)	{
+		struct task_struct *idle;
+
+		if (cpu == boot_cpu)
+			continue;
+
+		if (!cpu_possible(cpu)) {
+			/*
+			 * Make this processor do nothing on boot.
+			 * Note that we don't give the boot_pc function
+			 * a stack, so it has to be assembly code.
+			 */
+			per_cpu(boot_sp, cpu) = 0;
+			per_cpu(boot_pc, cpu) = (unsigned long) smp_nap;
+			continue;
+		}
+
+		/* Create a new idle thread to run start_secondary() */
+		idle = fork_idle(cpu);
+		if (IS_ERR(idle))
+			panic("failed fork for CPU %d", cpu);
+		idle->thread.pc = (unsigned long) start_secondary;
+
+		/* Make this thread the boot thread for this processor */
+		per_cpu(boot_sp, cpu) = task_ksp0(idle);
+		per_cpu(boot_pc, cpu) = idle->thread.pc;
+
+		++cpu_count;
+	}
+	BUG_ON(cpu_count > (max_cpus ? max_cpus : 1));
+
+	/* Fire up the other tiles, if any */
+	init_cpu_present(cpu_possible_mask);
+	if (cpumask_weight(cpu_present_mask) > 1) {
+		mb();  /* make sure all data is visible to new processors */
+		hv_start_all_tiles();
+	}
+}
+
+static __initdata struct cpumask init_affinity;
+
+static __init int reset_init_affinity(void)
+{
+	long rc = sched_setaffinity(current->pid, &init_affinity);
+	if (rc != 0)
+		printk(KERN_WARNING "couldn't reset init affinity (%ld)\n",
+		       rc);
+	return 0;
+}
+late_initcall(reset_init_affinity);
+
+struct cpumask cpu_started __cpuinitdata;
+
+/*
+ * Activate a secondary processor.  Very minimal; don't add anything
+ * to this path without knowing what you're doing, since SMP booting
+ * is pretty fragile.
+ */
+static void __cpuinit start_secondary(void)
+{
+	int cpuid = smp_processor_id();
+
+	/* Set our thread pointer appropriately. */
+	set_my_cpu_offset(__per_cpu_offset[cpuid]);
+
+	preempt_disable();
+
+	/*
+	 * In large machines even this will slow us down, since we
+	 * will be contending for for the printk spinlock.
+	 */
+	/* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */
+
+	/* Initialize the current asid for our first page table. */
+	__get_cpu_var(current_asid) = min_asid;
+
+	/* Set up this thread as another owner of the init_mm */
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if (current->mm)
+		BUG();
+	enter_lazy_tlb(&init_mm, current);
+
+	/* Enable IRQs. */
+	init_per_tile_IRQs();
+
+	/* Allow hypervisor messages to be received */
+	init_messaging();
+	local_irq_enable();
+
+	/* Indicate that we're ready to come up. */
+	/* Must not do this before we're ready to receive messages */
+	if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) {
+		printk(KERN_WARNING "CPU#%d already started!\n", cpuid);
+		for (;;)
+			local_irq_enable();
+	}
+
+	smp_nap();
+}
+
+void setup_mpls(void);  /* from kernel/setup.c */
+void store_permanent_mappings(void);
+
+/*
+ * Bring a secondary processor online.
+ */
+void __cpuinit online_secondary()
+{
+	/*
+	 * low-memory mappings have been cleared, flush them from
+	 * the local TLBs too.
+	 */
+	local_flush_tlb();
+
+	BUG_ON(in_interrupt());
+
+	/* This must be done before setting cpu_online_mask */
+	wmb();
+
+	/*
+	 * We need to hold call_lock, so there is no inconsistency
+	 * between the time smp_call_function() determines number of
+	 * IPI recipients, and the time when the determination is made
+	 * for which cpus receive the IPI. Holding this
+	 * lock helps us to not include this cpu in a currently in progress
+	 * smp_call_function().
+	 */
+	ipi_call_lock();
+	set_cpu_online(smp_processor_id(), 1);
+	ipi_call_unlock();
+	__get_cpu_var(cpu_state) = CPU_ONLINE;
+
+	/* Set up MPLs for this processor */
+	setup_mpls();
+
+
+	/* Set up tile-timer clock-event device on this cpu */
+	setup_tile_timer();
+
+	preempt_enable();
+
+	store_permanent_mappings();
+
+	cpu_idle();
+}
+
+int __cpuinit __cpu_up(unsigned int cpu)
+{
+	/* Wait 5s total for all CPUs for them to come online */
+	static int timeout;
+	for (; !cpumask_test_cpu(cpu, &cpu_started); timeout++) {
+		if (timeout >= 50000) {
+			printk(KERN_INFO "skipping unresponsive cpu%d\n", cpu);
+			local_irq_enable();
+			return -EIO;
+		}
+		udelay(100);
+	}
+
+	local_irq_enable();
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+
+	/* Unleash the CPU! */
+	send_IPI_single(cpu, MSG_TAG_START_CPU);
+	while (!cpumask_test_cpu(cpu, cpu_online_mask))
+		cpu_relax();
+	return 0;
+}
+
+static void panic_start_cpu(void)
+{
+	panic("Received a MSG_START_CPU IPI after boot finished.");
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	int cpu, next, rc;
+
+	/* Reset the response to a (now illegal) MSG_START_CPU IPI. */
+	start_cpu_function_addr = (unsigned long) &panic_start_cpu;
+
+	cpumask_copy(&init_affinity, cpu_online_mask);
+
+	/*
+	 * Pin ourselves to a single cpu in the initial affinity set
+	 * so that kernel mappings for the rootfs are not in the dataplane,
+	 * if set, and to avoid unnecessary migrating during bringup.
+	 * Use the last cpu just in case the whole chip has been
+	 * isolated from the scheduler, to keep init away from likely
+	 * more useful user code.  This also ensures that work scheduled
+	 * via schedule_delayed_work() in the init routines will land
+	 * on this cpu.
+	 */
+	for (cpu = cpumask_first(&init_affinity);
+	     (next = cpumask_next(cpu, &init_affinity)) < nr_cpu_ids;
+	     cpu = next)
+		;
+	rc = sched_setaffinity(current->pid, cpumask_of(cpu));
+	if (rc != 0)
+		printk("Couldn't set init affinity to cpu %d (%d)\n", cpu, rc);
+}
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
new file mode 100644
index 0000000..382170b
--- /dev/null
+++ b/arch/tile/kernel/stack.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/pfn.h>
+#include <linux/kallsyms.h>
+#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/mmzone.h>
+#include <asm/backtrace.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/ucontext.h>
+#include <asm/sigframe.h>
+#include <asm/stack.h>
+#include <arch/abi.h>
+#include <arch/interrupts.h>
+
+
+/* Is address on the specified kernel stack? */
+static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp)
+{
+	ulong kstack_base = (ulong) kbt->task->stack;
+	if (kstack_base == 0)  /* corrupt task pointer; just follow stack... */
+		return sp >= PAGE_OFFSET && sp < (unsigned long)high_memory;
+	return sp >= kstack_base && sp < kstack_base + THREAD_SIZE;
+}
+
+/* Is address in the specified kernel code? */
+static int in_kernel_text(VirtualAddress address)
+{
+	return (address >= MEM_SV_INTRPT &&
+		address < MEM_SV_INTRPT + HPAGE_SIZE);
+}
+
+/* Is address valid for reading? */
+static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address)
+{
+	HV_PTE *l1_pgtable = kbt->pgtable;
+	HV_PTE *l2_pgtable;
+	unsigned long pfn;
+	HV_PTE pte;
+	struct page *page;
+
+	pte = l1_pgtable[HV_L1_INDEX(address)];
+	if (!hv_pte_get_present(pte))
+		return 0;
+	pfn = hv_pte_get_pfn(pte);
+	if (pte_huge(pte)) {
+		if (!pfn_valid(pfn)) {
+			printk(KERN_ERR "huge page has bad pfn %#lx\n", pfn);
+			return 0;
+		}
+		return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
+	}
+
+	page = pfn_to_page(pfn);
+	if (PageHighMem(page)) {
+		printk(KERN_ERR "L2 page table not in LOWMEM (%#llx)\n",
+		       HV_PFN_TO_CPA(pfn));
+		return 0;
+	}
+	l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
+	pte = l2_pgtable[HV_L2_INDEX(address)];
+	return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
+}
+
+/* Callback for backtracer; basically a glorified memcpy */
+static bool read_memory_func(void *result, VirtualAddress address,
+			     unsigned int size, void *vkbt)
+{
+	int retval;
+	struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt;
+	if (in_kernel_text(address)) {
+		/* OK to read kernel code. */
+	} else if (address >= PAGE_OFFSET) {
+		/* We only tolerate kernel-space reads of this task's stack */
+		if (!in_kernel_stack(kbt, address))
+			return 0;
+	} else if (kbt->pgtable == NULL) {
+		return 0;	/* can't read user space in other tasks */
+	} else if (!valid_address(kbt, address)) {
+		return 0;	/* invalid user-space address */
+	}
+	pagefault_disable();
+	retval = __copy_from_user_inatomic(result, (const void *)address,
+					   size);
+	pagefault_enable();
+	return (retval == 0);
+}
+
+/* Return a pt_regs pointer for a valid fault handler frame */
+static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
+{
+#ifndef __tilegx__
+	const char *fault = NULL;  /* happy compiler */
+	char fault_buf[64];
+	VirtualAddress sp = kbt->it.sp;
+	struct pt_regs *p;
+
+	if (!in_kernel_stack(kbt, sp))
+		return NULL;
+	if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1))
+		return NULL;
+	p = (struct pt_regs *)(sp + C_ABI_SAVE_AREA_SIZE);
+	if (p->faultnum == INT_SWINT_1 || p->faultnum == INT_SWINT_1_SIGRETURN)
+		fault = "syscall";
+	else {
+		if (kbt->verbose) {     /* else we aren't going to use it */
+			snprintf(fault_buf, sizeof(fault_buf),
+				 "interrupt %ld", p->faultnum);
+			fault = fault_buf;
+		}
+	}
+	if (EX1_PL(p->ex1) == KERNEL_PL &&
+	    in_kernel_text(p->pc) &&
+	    in_kernel_stack(kbt, p->sp) &&
+	    p->sp >= sp) {
+		if (kbt->verbose)
+			printk(KERN_ERR "  <%s while in kernel mode>\n", fault);
+	} else if (EX1_PL(p->ex1) == USER_PL &&
+	    p->pc < PAGE_OFFSET &&
+	    p->sp < PAGE_OFFSET) {
+		if (kbt->verbose)
+			printk(KERN_ERR "  <%s while in user mode>\n", fault);
+	} else if (kbt->verbose) {
+		printk(KERN_ERR "  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
+		       p->pc, p->sp, p->ex1);
+		p = NULL;
+	}
+	if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0)
+		return p;
+#endif
+	return NULL;
+}
+
+/* Is the pc pointing to a sigreturn trampoline? */
+static int is_sigreturn(VirtualAddress pc)
+{
+	return (pc == VDSO_BASE);
+}
+
+/* Return a pt_regs pointer for a valid signal handler frame */
+static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt)
+{
+	BacktraceIterator *b = &kbt->it;
+
+	if (b->pc == VDSO_BASE) {
+		struct rt_sigframe *frame;
+		unsigned long sigframe_top =
+			b->sp + sizeof(struct rt_sigframe) - 1;
+		if (!valid_address(kbt, b->sp) ||
+		    !valid_address(kbt, sigframe_top)) {
+			if (kbt->verbose)
+				printk("  (odd signal: sp %#lx?)\n",
+				       (unsigned long)(b->sp));
+			return NULL;
+		}
+		frame = (struct rt_sigframe *)b->sp;
+		if (kbt->verbose) {
+			printk(KERN_ERR "  <received signal %d>\n",
+			       frame->info.si_signo);
+		}
+		return &frame->uc.uc_mcontext.regs;
+	}
+	return NULL;
+}
+
+int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
+{
+	return is_sigreturn(kbt->it.pc);
+}
+
+static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
+{
+	struct pt_regs *p;
+
+	p = valid_fault_handler(kbt);
+	if (p == NULL)
+		p = valid_sigframe(kbt);
+	if (p == NULL)
+		return 0;
+	backtrace_init(&kbt->it, read_memory_func, kbt,
+		       p->pc, p->lr, p->sp, p->regs[52]);
+	kbt->new_context = 1;
+	return 1;
+}
+
+/* Find a frame that isn't a sigreturn, if there is one. */
+static int KBacktraceIterator_next_item_inclusive(
+	struct KBacktraceIterator *kbt)
+{
+	for (;;) {
+		do {
+			if (!KBacktraceIterator_is_sigreturn(kbt))
+				return 1;
+		} while (backtrace_next(&kbt->it));
+
+		if (!KBacktraceIterator_restart(kbt))
+			return 0;
+	}
+}
+
+/*
+ * If the current sp is on a page different than what we recorded
+ * as the top-of-kernel-stack last time we context switched, we have
+ * probably blown the stack, and nothing is going to work out well.
+ * If we can at least get out a warning, that may help the debug,
+ * though we probably won't be able to backtrace into the code that
+ * actually did the recursive damage.
+ */
+static void validate_stack(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	unsigned long ksp0 = get_current_ksp0();
+	unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+	unsigned long sp = stack_pointer;
+
+	if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
+		printk("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+		       "  sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
+		       cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+	}
+
+	else if (sp < ksp0_base + sizeof(struct thread_info)) {
+		printk("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+		       "  sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
+		       cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+	}
+}
+
+void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
+			     struct task_struct *t, struct pt_regs *regs)
+{
+	VirtualAddress pc, lr, sp, r52;
+	int is_current;
+
+	/*
+	 * Set up callback information.  We grab the kernel stack base
+	 * so we will allow reads of that address range, and if we're
+	 * asking about the current process we grab the page table
+	 * so we can check user accesses before trying to read them.
+	 * We flush the TLB to avoid any weird skew issues.
+	 */
+	is_current = (t == NULL);
+	kbt->is_current = is_current;
+	if (is_current)
+		t = validate_current();
+	kbt->task = t;
+	kbt->pgtable = NULL;
+	kbt->verbose = 0;   /* override in caller if desired */
+	kbt->profile = 0;   /* override in caller if desired */
+	kbt->end = 0;
+	kbt->new_context = 0;
+	if (is_current) {
+		HV_PhysAddr pgdir_pa = hv_inquire_context().page_table;
+		if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) {
+			/*
+			 * Not just an optimization: this also allows
+			 * this to work at all before va/pa mappings
+			 * are set up.
+			 */
+			kbt->pgtable = swapper_pg_dir;
+		} else {
+			struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa));
+			if (!PageHighMem(page))
+				kbt->pgtable = __va(pgdir_pa);
+			else
+				printk(KERN_ERR "page table not in LOWMEM"
+				       " (%#llx)\n", pgdir_pa);
+		}
+		local_flush_tlb_all();
+		validate_stack(regs);
+	}
+
+	if (regs == NULL) {
+		extern const void *get_switch_to_pc(void);
+		if (is_current || t->state == TASK_RUNNING) {
+			/* Can't do this; we need registers */
+			kbt->end = 1;
+			return;
+		}
+		pc = (ulong) get_switch_to_pc();
+		lr = t->thread.pc;
+		sp = t->thread.ksp;
+		r52 = 0;
+	} else {
+		pc = regs->pc;
+		lr = regs->lr;
+		sp = regs->sp;
+		r52 = regs->regs[52];
+	}
+
+	backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52);
+	kbt->end = !KBacktraceIterator_next_item_inclusive(kbt);
+}
+EXPORT_SYMBOL(KBacktraceIterator_init);
+
+int KBacktraceIterator_end(struct KBacktraceIterator *kbt)
+{
+	return kbt->end;
+}
+EXPORT_SYMBOL(KBacktraceIterator_end);
+
+void KBacktraceIterator_next(struct KBacktraceIterator *kbt)
+{
+	kbt->new_context = 0;
+	if (!backtrace_next(&kbt->it) &&
+	    !KBacktraceIterator_restart(kbt)) {
+			kbt->end = 1;
+			return;
+		}
+
+	kbt->end = !KBacktraceIterator_next_item_inclusive(kbt);
+}
+EXPORT_SYMBOL(KBacktraceIterator_next);
+
+/*
+ * This method wraps the backtracer's more generic support.
+ * It is only invoked from the architecture-specific code; show_stack()
+ * and dump_stack() (in entry.S) are architecture-independent entry points.
+ */
+void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+{
+	int i;
+
+	if (headers) {
+		/*
+		 * Add a blank line since if we are called from panic(),
+		 * then bust_spinlocks() spit out a space in front of us
+		 * and it will mess up our KERN_ERR.
+		 */
+		printk("\n");
+		printk(KERN_ERR "Starting stack dump of tid %d, pid %d (%s)"
+		       " on cpu %d at cycle %lld\n",
+		       kbt->task->pid, kbt->task->tgid, kbt->task->comm,
+		       smp_processor_id(), get_cycles());
+	}
+#ifdef __tilegx__
+	if (kbt->is_current) {
+		__insn_mtspr(SPR_SIM_CONTROL,
+			     SIM_DUMP_SPR_ARG(SIM_DUMP_BACKTRACE));
+	}
+#endif
+	kbt->verbose = 1;
+	i = 0;
+	for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
+		char *modname;
+		const char *name;
+		unsigned long address = kbt->it.pc;
+		unsigned long offset, size;
+		char namebuf[KSYM_NAME_LEN+100];
+
+		if (address >= PAGE_OFFSET)
+			name = kallsyms_lookup(address, &size, &offset,
+					       &modname, namebuf);
+		else
+			name = NULL;
+
+		if (!name)
+			namebuf[0] = '\0';
+		else {
+			size_t namelen = strlen(namebuf);
+			size_t remaining = (sizeof(namebuf) - 1) - namelen;
+			char *p = namebuf + namelen;
+			int rc = snprintf(p, remaining, "+%#lx/%#lx ",
+					  offset, size);
+			if (modname && rc < remaining)
+				snprintf(p + rc, remaining - rc,
+					 "[%s] ", modname);
+			namebuf[sizeof(namebuf)-1] = '\0';
+		}
+
+		printk(KERN_ERR "  frame %d: 0x%lx %s(sp 0x%lx)\n",
+		       i++, address, namebuf, (unsigned long)(kbt->it.sp));
+
+		if (i >= 100) {
+			printk(KERN_ERR "Stack dump truncated"
+			       " (%d frames)\n", i);
+			break;
+		}
+	}
+	if (headers)
+		printk(KERN_ERR "Stack dump complete\n");
+}
+EXPORT_SYMBOL(tile_show_stack);
+
+
+/* This is called from show_regs() and _dump_stack() */
+void dump_stack_regs(struct pt_regs *regs)
+{
+	struct KBacktraceIterator kbt;
+	KBacktraceIterator_init(&kbt, NULL, regs);
+	tile_show_stack(&kbt, 1);
+}
+EXPORT_SYMBOL(dump_stack_regs);
+
+static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
+				       ulong pc, ulong lr, ulong sp, ulong r52)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+	regs->pc = pc;
+	regs->lr = lr;
+	regs->sp = sp;
+	regs->regs[52] = r52;
+	return regs;
+}
+
+/* This is called from dump_stack() and just converts to pt_regs */
+void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
+{
+	struct pt_regs regs;
+	dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+}
+
+/* This is called from KBacktraceIterator_init_current() */
+void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
+				      ulong lr, ulong sp, ulong r52)
+{
+	struct pt_regs regs;
+	KBacktraceIterator_init(kbt, NULL,
+				regs_to_pt_regs(&regs, pc, lr, sp, r52));
+}
+
+/* This is called only from kernel/sched.c, with esp == NULL */
+void show_stack(struct task_struct *task, unsigned long *esp)
+{
+	struct KBacktraceIterator kbt;
+	if (task == NULL || task == current)
+		KBacktraceIterator_init_current(&kbt);
+	else
+		KBacktraceIterator_init(&kbt, task, NULL);
+	tile_show_stack(&kbt, 0);
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/* Support generic Linux stack API too */
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+	struct KBacktraceIterator kbt;
+	int skip = trace->skip;
+	int i = 0;
+
+	if (task == NULL || task == current)
+		KBacktraceIterator_init_current(&kbt);
+	else
+		KBacktraceIterator_init(&kbt, task, NULL);
+	for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
+		if (skip) {
+			--skip;
+			continue;
+		}
+		if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+			break;
+		trace->entries[i++] = kbt.it.pc;
+	}
+	trace->nr_entries = i;
+}
+EXPORT_SYMBOL(save_stack_trace_tsk);
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	save_stack_trace_tsk(NULL, trace);
+}
+
+#endif
+
+/* In entry.S */
+EXPORT_SYMBOL(KBacktraceIterator_init_current);
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
new file mode 100644
index 0000000..a3d982b
--- /dev/null
+++ b/arch/tile/kernel/sys.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/TILE
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/mempolicy.h>
+#include <linux/binfmts.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/signal.h>
+#include <asm/syscalls.h>
+
+#include <asm/pgtable.h>
+#include <asm/homecache.h>
+#include <arch/chip.h>
+
+SYSCALL_DEFINE0(flush_cache)
+{
+	homecache_evict(cpumask_of(smp_processor_id()));
+	return 0;
+}
+
+/*
+ * Syscalls that pass 64-bit values on 32-bit systems normally
+ * pass them as (low,high) word packed into the immediately adjacent
+ * registers.  If the low word naturally falls on an even register,
+ * our ABI makes it work correctly; if not, we adjust it here.
+ * Handling it here means we don't have to fix uclibc AND glibc AND
+ * any other standard libcs we want to support.
+ */
+
+#if !defined(__tilegx__) || defined(CONFIG_COMPAT)
+
+ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count)
+{
+	return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count);
+}
+
+long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
+		     u32 len, int advice)
+{
+	return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo,
+				len, advice);
+}
+
+int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
+		       u32 len_lo, u32 len_hi, int advice)
+{
+	return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo,
+				((loff_t)len_hi << 32) | len_lo, advice);
+}
+
+#endif /* 32-bit syscall wrappers */
+
+/*
+ * This API uses a 4KB-page-count offset into the file descriptor.
+ * It is likely not the right API to use on a 64-bit platform.
+ */
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, off_4k)
+{
+#define PAGE_ADJUST (PAGE_SHIFT - 12)
+	if (off_4k & ((1 << PAGE_ADJUST) - 1))
+		return -EINVAL;
+	return sys_mmap_pgoff(addr, len, prot, flags, fd,
+			      off_4k >> PAGE_ADJUST);
+}
+
+/*
+ * This API uses a byte offset into the file descriptor.
+ * It is likely not the right API to use on a 32-bit platform.
+ */
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, offset)
+{
+	if (offset & ((1 << PAGE_SHIFT) - 1))
+		return -EINVAL;
+	return sys_mmap_pgoff(addr, len, prot, flags, fd,
+			      offset >> PAGE_SHIFT);
+}
+
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+#ifndef __tilegx__
+/* See comments at the top of the file. */
+#define sys_fadvise64 sys32_fadvise64
+#define sys_fadvise64_64 sys32_fadvise64_64
+#define sys_readahead sys32_readahead
+#define sys_sync_file_range sys_sync_file_range2
+#endif
+
+void *sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/tile/kernel/tile-desc_32.c b/arch/tile/kernel/tile-desc_32.c
new file mode 100644
index 0000000..3b78369
--- /dev/null
+++ b/arch/tile/kernel/tile-desc_32.c
@@ -0,0 +1,13826 @@
+/* Define to include "bfd.h" and get actual BFD relocations below. */
+/* #define WANT_BFD_RELOCS */
+
+#ifdef WANT_BFD_RELOCS
+#include "bfd.h"
+#define MAYBE_BFD_RELOC(X) (X)
+#else
+#define MAYBE_BFD_RELOC(X) -1
+#endif
+
+/* Special registers. */
+#define TREG_LR 55
+#define TREG_SN 56
+#define TREG_ZERO 63
+
+/* FIXME: Rename this. */
+#include <asm/opcode-tile.h>
+
+
+const struct tile_opcode tile_opcodes[394] =
+{
+ { "bpt", TILE_OPC_BPT, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbffffff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b3cae00000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "info", TILE_OPC_INFO, 0xf /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0 },
+      { 1 },
+      { 2 },
+      { 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00fffULL,
+      0xfff807ff80000000ULL,
+      0x8000000078000fffULL,
+      0xf80007ff80000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000050100fffULL,
+      0x302007ff80000000ULL,
+      0x8000000050000fffULL,
+      0xc00007ff80000000ULL,
+      -1ULL
+    }
+  },
+  { "infol", TILE_OPC_INFOL, 0x3 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 4 },
+      { 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000fffULL,
+      0xf80007ff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000030000fffULL,
+      0x200007ff80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "j", TILE_OPC_J, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 6 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf000000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x5000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jal", TILE_OPC_JAL, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_LR, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 6 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf000000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x6000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "move", TILE_OPC_MOVE, 0xf /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 9, 10 },
+      { 11, 12 },
+      { 13, 14 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x80000000780ff000ULL,
+      0xf807f80000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000cff000ULL,
+      0x0833f80000000000ULL,
+      0x80000000180bf000ULL,
+      0x9805f80000000000ULL,
+      -1ULL
+    }
+  },
+  { "move.sn", TILE_OPC_MOVE_SN, 0x3 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008cff000ULL,
+      0x0c33f80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "movei", TILE_OPC_MOVEI, 0xf /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 0 },
+      { 9, 1 },
+      { 11, 2 },
+      { 13, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00fc0ULL,
+      0xfff807e000000000ULL,
+      0x8000000078000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040800fc0ULL,
+      0x305807e000000000ULL,
+      0x8000000058000fc0ULL,
+      0xc80007e000000000ULL,
+      -1ULL
+    }
+  },
+  { "movei.sn", TILE_OPC_MOVEI_SN, 0x3 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 0 },
+      { 9, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00fc0ULL,
+      0xfff807e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048800fc0ULL,
+      0x345807e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "moveli", TILE_OPC_MOVELI, 0x3 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 4 },
+      { 9, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000020000fc0ULL,
+      0x180007e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "moveli.sn", TILE_OPC_MOVELI_SN, 0x3 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 4 },
+      { 9, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000010000fc0ULL,
+      0x100007e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "movelis", TILE_OPC_MOVELIS, 0x3 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 4 },
+      { 9, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000010000fc0ULL,
+      0x100007e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "prefetch", TILE_OPC_PREFETCH, 0x12 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000003f00000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b501f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8000000003f00000ULL
+    }
+  },
+  { "add", TILE_OPC_ADD, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000000c0000ULL,
+      0x0806000000000000ULL,
+      0x8000000008000000ULL,
+      0x8800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "add.sn", TILE_OPC_ADD_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000080c0000ULL,
+      0x0c06000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addb", TILE_OPC_ADDB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000040000ULL,
+      0x0802000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addb.sn", TILE_OPC_ADDB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008040000ULL,
+      0x0c02000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addbs_u", TILE_OPC_ADDBS_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001880000ULL,
+      0x0888000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addbs_u.sn", TILE_OPC_ADDBS_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009880000ULL,
+      0x0c88000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addh", TILE_OPC_ADDH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000080000ULL,
+      0x0804000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addh.sn", TILE_OPC_ADDH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008080000ULL,
+      0x0c04000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addhs", TILE_OPC_ADDHS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000018c0000ULL,
+      0x088a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addhs.sn", TILE_OPC_ADDHS_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000098c0000ULL,
+      0x0c8a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addi", TILE_OPC_ADDI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040300000ULL,
+      0x3018000000000000ULL,
+      0x8000000048000000ULL,
+      0xb800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "addi.sn", TILE_OPC_ADDI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048300000ULL,
+      0x3418000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addib", TILE_OPC_ADDIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040100000ULL,
+      0x3008000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addib.sn", TILE_OPC_ADDIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048100000ULL,
+      0x3408000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addih", TILE_OPC_ADDIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040200000ULL,
+      0x3010000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addih.sn", TILE_OPC_ADDIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048200000ULL,
+      0x3410000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addli", TILE_OPC_ADDLI, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 4 },
+      { 9, 10, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000020000000ULL,
+      0x1800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addli.sn", TILE_OPC_ADDLI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 4 },
+      { 9, 10, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000010000000ULL,
+      0x1000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "addlis", TILE_OPC_ADDLIS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 4 },
+      { 9, 10, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000010000000ULL,
+      0x1000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adds", TILE_OPC_ADDS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001800000ULL,
+      0x0884000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adds.sn", TILE_OPC_ADDS_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009800000ULL,
+      0x0c84000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adiffb_u", TILE_OPC_ADIFFB_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000100000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adiffb_u.sn", TILE_OPC_ADIFFB_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008100000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adiffh", TILE_OPC_ADIFFH, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000140000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "adiffh.sn", TILE_OPC_ADIFFH_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008140000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "and", TILE_OPC_AND, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000180000ULL,
+      0x0808000000000000ULL,
+      0x8000000018000000ULL,
+      0x9800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "and.sn", TILE_OPC_AND_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008180000ULL,
+      0x0c08000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "andi", TILE_OPC_ANDI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000050100000ULL,
+      0x3020000000000000ULL,
+      0x8000000050000000ULL,
+      0xc000000000000000ULL,
+      -1ULL
+    }
+  },
+  { "andi.sn", TILE_OPC_ANDI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000058100000ULL,
+      0x3420000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "auli", TILE_OPC_AULI, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 4 },
+      { 9, 10, 5 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000030000000ULL,
+      0x2000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "avgb_u", TILE_OPC_AVGB_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000001c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "avgb_u.sn", TILE_OPC_AVGB_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000081c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "avgh", TILE_OPC_AVGH, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000200000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "avgh.sn", TILE_OPC_AVGH_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008200000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbns", TILE_OPC_BBNS, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000700000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbns.sn", TILE_OPC_BBNS_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000700000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbnst", TILE_OPC_BBNST, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000780000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbnst.sn", TILE_OPC_BBNST_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000780000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbs", TILE_OPC_BBS, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000600000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbs.sn", TILE_OPC_BBS_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000600000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbst", TILE_OPC_BBST, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000680000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bbst.sn", TILE_OPC_BBST_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000680000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgez", TILE_OPC_BGEZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000300000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgez.sn", TILE_OPC_BGEZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000300000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgezt", TILE_OPC_BGEZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000380000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgezt.sn", TILE_OPC_BGEZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000380000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgz", TILE_OPC_BGZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000200000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgz.sn", TILE_OPC_BGZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000200000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgzt", TILE_OPC_BGZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000280000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bgzt.sn", TILE_OPC_BGZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000280000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bitx", TILE_OPC_BITX, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 11, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070161000ULL,
+      -1ULL,
+      0x80000000680a1000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bitx.sn", TILE_OPC_BITX_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078161000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blez", TILE_OPC_BLEZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000500000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blez.sn", TILE_OPC_BLEZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000500000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blezt", TILE_OPC_BLEZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000580000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blezt.sn", TILE_OPC_BLEZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000580000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blz", TILE_OPC_BLZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000400000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blz.sn", TILE_OPC_BLZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000400000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blzt", TILE_OPC_BLZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000480000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "blzt.sn", TILE_OPC_BLZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000480000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bnz", TILE_OPC_BNZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000100000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bnz.sn", TILE_OPC_BNZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000100000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bnzt", TILE_OPC_BNZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000180000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bnzt.sn", TILE_OPC_BNZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000180000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bytex", TILE_OPC_BYTEX, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 11, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070162000ULL,
+      -1ULL,
+      0x80000000680a2000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bytex.sn", TILE_OPC_BYTEX_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078162000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bz", TILE_OPC_BZ, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bz.sn", TILE_OPC_BZ_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bzt", TILE_OPC_BZT, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2800000080000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "bzt.sn", TILE_OPC_BZT_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 20 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfc00000780000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x2c00000080000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "clz", TILE_OPC_CLZ, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 11, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070163000ULL,
+      -1ULL,
+      0x80000000680a3000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "clz.sn", TILE_OPC_CLZ_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078163000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "crc32_32", TILE_OPC_CRC32_32, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000240000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "crc32_32.sn", TILE_OPC_CRC32_32_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008240000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "crc32_8", TILE_OPC_CRC32_8, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000280000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "crc32_8.sn", TILE_OPC_CRC32_8_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008280000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "ctz", TILE_OPC_CTZ, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 11, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070164000ULL,
+      -1ULL,
+      0x80000000680a4000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "ctz.sn", TILE_OPC_CTZ_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078164000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "drain", TILE_OPC_DRAIN, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b080000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "dtlbpr", TILE_OPC_DTLBPR, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b100000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "dword_align", TILE_OPC_DWORD_ALIGN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000017c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "dword_align.sn", TILE_OPC_DWORD_ALIGN_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000097c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "finv", TILE_OPC_FINV, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b180000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "flush", TILE_OPC_FLUSH, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b200000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "fnop", TILE_OPC_FNOP, 0xf /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      {  },
+      {  },
+      {  },
+      {  },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000077fff000ULL,
+      0xfbfff80000000000ULL,
+      0x80000000780ff000ULL,
+      0xf807f80000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070165000ULL,
+      0x400b280000000000ULL,
+      0x80000000680a5000ULL,
+      0xd805080000000000ULL,
+      -1ULL
+    }
+  },
+  { "icoh", TILE_OPC_ICOH, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b300000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "ill", TILE_OPC_ILL, 0xa /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      {  },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0xf807f80000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b380000000000ULL,
+      -1ULL,
+      0xd805100000000000ULL,
+      -1ULL
+    }
+  },
+  { "inthb", TILE_OPC_INTHB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000002c0000ULL,
+      0x080a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "inthb.sn", TILE_OPC_INTHB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000082c0000ULL,
+      0x0c0a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "inthh", TILE_OPC_INTHH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000300000ULL,
+      0x080c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "inthh.sn", TILE_OPC_INTHH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008300000ULL,
+      0x0c0c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "intlb", TILE_OPC_INTLB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000340000ULL,
+      0x080e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "intlb.sn", TILE_OPC_INTLB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008340000ULL,
+      0x0c0e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "intlh", TILE_OPC_INTLH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000380000ULL,
+      0x0810000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "intlh.sn", TILE_OPC_INTLH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008380000ULL,
+      0x0c10000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "inv", TILE_OPC_INV, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b400000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "iret", TILE_OPC_IRET, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b480000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jalb", TILE_OPC_JALB, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_LR, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 22 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x6800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jalf", TILE_OPC_JALF, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_LR, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 22 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x6000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jalr", TILE_OPC_JALR, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_LR, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0814000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jalrp", TILE_OPC_JALRP, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_LR, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0812000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jb", TILE_OPC_JB, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 22 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x5800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jf", TILE_OPC_JF, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 22 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x5000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jr", TILE_OPC_JR, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0818000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "jrp", TILE_OPC_JRP, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0816000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lb", TILE_OPC_LB, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 23, 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b500000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8000000000000000ULL
+    }
+  },
+  { "lb.sn", TILE_OPC_LB_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440b500000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lb_u", TILE_OPC_LB_U, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 23, 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b580000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8100000000000000ULL
+    }
+  },
+  { "lb_u.sn", TILE_OPC_LB_U_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440b580000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lbadd", TILE_OPC_LBADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lbadd.sn", TILE_OPC_LBADD_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lbadd_u", TILE_OPC_LBADD_U, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lbadd_u.sn", TILE_OPC_LBADD_U_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lh", TILE_OPC_LH, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 23, 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b600000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8200000000000000ULL
+    }
+  },
+  { "lh.sn", TILE_OPC_LH_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440b600000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lh_u", TILE_OPC_LH_U, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 23, 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b680000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8300000000000000ULL
+    }
+  },
+  { "lh_u.sn", TILE_OPC_LH_U_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440b680000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lhadd", TILE_OPC_LHADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lhadd.sn", TILE_OPC_LHADD_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lhadd_u", TILE_OPC_LHADD_U, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lhadd_u.sn", TILE_OPC_LHADD_U_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lnk", TILE_OPC_LNK, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x081a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lnk.sn", TILE_OPC_LNK_SN, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0c1a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lw", TILE_OPC_LW, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 23, 15 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b700000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8400000000000000ULL
+    }
+  },
+  { "lw.sn", TILE_OPC_LW_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440b700000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lw_na", TILE_OPC_LW_NA, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400bc00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lw_na.sn", TILE_OPC_LW_NA_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440bc00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lwadd", TILE_OPC_LWADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lwadd.sn", TILE_OPC_LWADD_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lwadd_na", TILE_OPC_LWADD_NA, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30d8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "lwadd_na.sn", TILE_OPC_LWADD_NA_SN, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 24, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x34d8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxb_u", TILE_OPC_MAXB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000003c0000ULL,
+      0x081c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxb_u.sn", TILE_OPC_MAXB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000083c0000ULL,
+      0x0c1c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxh", TILE_OPC_MAXH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000400000ULL,
+      0x081e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxh.sn", TILE_OPC_MAXH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008400000ULL,
+      0x0c1e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxib_u", TILE_OPC_MAXIB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040400000ULL,
+      0x3028000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxib_u.sn", TILE_OPC_MAXIB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048400000ULL,
+      0x3428000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxih", TILE_OPC_MAXIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040500000ULL,
+      0x3030000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "maxih.sn", TILE_OPC_MAXIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048500000ULL,
+      0x3430000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mf", TILE_OPC_MF, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b780000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mfspr", TILE_OPC_MFSPR, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 25 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbf8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x3038000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minb_u", TILE_OPC_MINB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000440000ULL,
+      0x0820000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minb_u.sn", TILE_OPC_MINB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008440000ULL,
+      0x0c20000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minh", TILE_OPC_MINH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000480000ULL,
+      0x0822000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minh.sn", TILE_OPC_MINH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008480000ULL,
+      0x0c22000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minib_u", TILE_OPC_MINIB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040600000ULL,
+      0x3040000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minib_u.sn", TILE_OPC_MINIB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048600000ULL,
+      0x3440000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minih", TILE_OPC_MINIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040700000ULL,
+      0x3048000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "minih.sn", TILE_OPC_MINIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048700000ULL,
+      0x3448000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mm", TILE_OPC_MM, 0x3 /* pipes */, 5 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16, 26, 27 },
+      { 9, 10, 17, 28, 29 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000060000000ULL,
+      0x3800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mnz", TILE_OPC_MNZ, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000540000ULL,
+      0x0828000000000000ULL,
+      0x8000000010000000ULL,
+      0x9002000000000000ULL,
+      -1ULL
+    }
+  },
+  { "mnz.sn", TILE_OPC_MNZ_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008540000ULL,
+      0x0c28000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mnzb", TILE_OPC_MNZB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000004c0000ULL,
+      0x0824000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mnzb.sn", TILE_OPC_MNZB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000084c0000ULL,
+      0x0c24000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mnzh", TILE_OPC_MNZH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000500000ULL,
+      0x0826000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mnzh.sn", TILE_OPC_MNZH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008500000ULL,
+      0x0c26000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mtspr", TILE_OPC_MTSPR, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 30, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbf8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x3050000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_ss", TILE_OPC_MULHH_SS, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 11, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000680000ULL,
+      -1ULL,
+      0x8000000038000000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_ss.sn", TILE_OPC_MULHH_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008680000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_su", TILE_OPC_MULHH_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000006c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_su.sn", TILE_OPC_MULHH_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000086c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_uu", TILE_OPC_MULHH_UU, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 11, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000700000ULL,
+      -1ULL,
+      0x8000000038040000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhh_uu.sn", TILE_OPC_MULHH_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008700000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_ss", TILE_OPC_MULHHA_SS, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000580000ULL,
+      -1ULL,
+      0x8000000040000000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_ss.sn", TILE_OPC_MULHHA_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008580000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_su", TILE_OPC_MULHHA_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000005c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_su.sn", TILE_OPC_MULHHA_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000085c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_uu", TILE_OPC_MULHHA_UU, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000600000ULL,
+      -1ULL,
+      0x8000000040040000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhha_uu.sn", TILE_OPC_MULHHA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008600000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhhsa_uu", TILE_OPC_MULHHSA_UU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000640000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhhsa_uu.sn", TILE_OPC_MULHHSA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008640000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_ss", TILE_OPC_MULHL_SS, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_ss.sn", TILE_OPC_MULHL_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_su", TILE_OPC_MULHL_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000008c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_su.sn", TILE_OPC_MULHL_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000088c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_us", TILE_OPC_MULHL_US, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_us.sn", TILE_OPC_MULHL_US_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_uu", TILE_OPC_MULHL_UU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000940000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhl_uu.sn", TILE_OPC_MULHL_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008940000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_ss", TILE_OPC_MULHLA_SS, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_ss.sn", TILE_OPC_MULHLA_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_su", TILE_OPC_MULHLA_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_su.sn", TILE_OPC_MULHLA_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_us", TILE_OPC_MULHLA_US, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000007c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_us.sn", TILE_OPC_MULHLA_US_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000087c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_uu", TILE_OPC_MULHLA_UU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhla_uu.sn", TILE_OPC_MULHLA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhlsa_uu", TILE_OPC_MULHLSA_UU, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000840000ULL,
+      -1ULL,
+      0x8000000030000000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulhlsa_uu.sn", TILE_OPC_MULHLSA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_ss", TILE_OPC_MULLL_SS, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 11, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000a80000ULL,
+      -1ULL,
+      0x8000000038080000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_ss.sn", TILE_OPC_MULLL_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008a80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_su", TILE_OPC_MULLL_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000ac0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_su.sn", TILE_OPC_MULLL_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008ac0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_uu", TILE_OPC_MULLL_UU, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 11, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000b00000ULL,
+      -1ULL,
+      0x80000000380c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulll_uu.sn", TILE_OPC_MULLL_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008b00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_ss", TILE_OPC_MULLLA_SS, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000980000ULL,
+      -1ULL,
+      0x8000000040080000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_ss.sn", TILE_OPC_MULLLA_SS_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008980000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_su", TILE_OPC_MULLLA_SU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000009c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_su.sn", TILE_OPC_MULLLA_SU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000089c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_uu", TILE_OPC_MULLLA_UU, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000a00000ULL,
+      -1ULL,
+      0x80000000400c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mullla_uu.sn", TILE_OPC_MULLLA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008a00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulllsa_uu", TILE_OPC_MULLLSA_UU, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000a40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mulllsa_uu.sn", TILE_OPC_MULLLSA_UU_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008a40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mvnz", TILE_OPC_MVNZ, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000b40000ULL,
+      -1ULL,
+      0x8000000010040000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mvnz.sn", TILE_OPC_MVNZ_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008b40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mvz", TILE_OPC_MVZ, 0x5 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 31, 12, 18 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0x80000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000b80000ULL,
+      -1ULL,
+      0x8000000010080000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mvz.sn", TILE_OPC_MVZ_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008b80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mz", TILE_OPC_MZ, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000c40000ULL,
+      0x082e000000000000ULL,
+      0x80000000100c0000ULL,
+      0x9004000000000000ULL,
+      -1ULL
+    }
+  },
+  { "mz.sn", TILE_OPC_MZ_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008c40000ULL,
+      0x0c2e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mzb", TILE_OPC_MZB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000bc0000ULL,
+      0x082a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mzb.sn", TILE_OPC_MZB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008bc0000ULL,
+      0x0c2a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mzh", TILE_OPC_MZH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000c00000ULL,
+      0x082c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "mzh.sn", TILE_OPC_MZH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008c00000ULL,
+      0x0c2c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "nap", TILE_OPC_NAP, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b800000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "nop", TILE_OPC_NOP, 0xf /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      {  },
+      {  },
+      {  },
+      {  },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x8000000077fff000ULL,
+      0xfbfff80000000000ULL,
+      0x80000000780ff000ULL,
+      0xf807f80000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070166000ULL,
+      0x400b880000000000ULL,
+      0x80000000680a6000ULL,
+      0xd805180000000000ULL,
+      -1ULL
+    }
+  },
+  { "nor", TILE_OPC_NOR, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000c80000ULL,
+      0x0830000000000000ULL,
+      0x8000000018040000ULL,
+      0x9802000000000000ULL,
+      -1ULL
+    }
+  },
+  { "nor.sn", TILE_OPC_NOR_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008c80000ULL,
+      0x0c30000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "or", TILE_OPC_OR, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000cc0000ULL,
+      0x0832000000000000ULL,
+      0x8000000018080000ULL,
+      0x9804000000000000ULL,
+      -1ULL
+    }
+  },
+  { "or.sn", TILE_OPC_OR_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008cc0000ULL,
+      0x0c32000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "ori", TILE_OPC_ORI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040800000ULL,
+      0x3058000000000000ULL,
+      0x8000000058000000ULL,
+      0xc800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "ori.sn", TILE_OPC_ORI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048800000ULL,
+      0x3458000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packbs_u", TILE_OPC_PACKBS_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000019c0000ULL,
+      0x0892000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packbs_u.sn", TILE_OPC_PACKBS_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000099c0000ULL,
+      0x0c92000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packhb", TILE_OPC_PACKHB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000d00000ULL,
+      0x0834000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packhb.sn", TILE_OPC_PACKHB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008d00000ULL,
+      0x0c34000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packhs", TILE_OPC_PACKHS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001980000ULL,
+      0x0890000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packhs.sn", TILE_OPC_PACKHS_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009980000ULL,
+      0x0c90000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packlb", TILE_OPC_PACKLB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000d40000ULL,
+      0x0836000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "packlb.sn", TILE_OPC_PACKLB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008d40000ULL,
+      0x0c36000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "pcnt", TILE_OPC_PCNT, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 11, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070167000ULL,
+      -1ULL,
+      0x80000000680a7000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "pcnt.sn", TILE_OPC_PCNT_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078167000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "rl", TILE_OPC_RL, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000d80000ULL,
+      0x0838000000000000ULL,
+      0x8000000020000000ULL,
+      0xa000000000000000ULL,
+      -1ULL
+    }
+  },
+  { "rl.sn", TILE_OPC_RL_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008d80000ULL,
+      0x0c38000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "rli", TILE_OPC_RLI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 11, 12, 34 },
+      { 13, 14, 35 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0x80000000780e0000ULL,
+      0xf807000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070020000ULL,
+      0x4001000000000000ULL,
+      0x8000000068020000ULL,
+      0xd801000000000000ULL,
+      -1ULL
+    }
+  },
+  { "rli.sn", TILE_OPC_RLI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078020000ULL,
+      0x4401000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "s1a", TILE_OPC_S1A, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000dc0000ULL,
+      0x083a000000000000ULL,
+      0x8000000008040000ULL,
+      0x8802000000000000ULL,
+      -1ULL
+    }
+  },
+  { "s1a.sn", TILE_OPC_S1A_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008dc0000ULL,
+      0x0c3a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "s2a", TILE_OPC_S2A, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000e00000ULL,
+      0x083c000000000000ULL,
+      0x8000000008080000ULL,
+      0x8804000000000000ULL,
+      -1ULL
+    }
+  },
+  { "s2a.sn", TILE_OPC_S2A_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008e00000ULL,
+      0x0c3c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "s3a", TILE_OPC_S3A, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000e40000ULL,
+      0x083e000000000000ULL,
+      0x8000000030040000ULL,
+      0xb002000000000000ULL,
+      -1ULL
+    }
+  },
+  { "s3a.sn", TILE_OPC_S3A_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008e40000ULL,
+      0x0c3e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadab_u", TILE_OPC_SADAB_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000e80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadab_u.sn", TILE_OPC_SADAB_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008e80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadah", TILE_OPC_SADAH, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000ec0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadah.sn", TILE_OPC_SADAH_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008ec0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadah_u", TILE_OPC_SADAH_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000f00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadah_u.sn", TILE_OPC_SADAH_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008f00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadb_u", TILE_OPC_SADB_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadb_u.sn", TILE_OPC_SADB_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadh", TILE_OPC_SADH, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000f80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadh.sn", TILE_OPC_SADH_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008f80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadh_u", TILE_OPC_SADH_U, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000000fc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sadh_u.sn", TILE_OPC_SADH_U_SN, 0x1 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000008fc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sb", TILE_OPC_SB, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 17 },
+      { 0, },
+      { 0, },
+      { 15, 36 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0840000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8500000000000000ULL
+    }
+  },
+  { "sbadd", TILE_OPC_SBADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 24, 17, 37 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbf8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30e0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seq", TILE_OPC_SEQ, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001080000ULL,
+      0x0846000000000000ULL,
+      0x8000000030080000ULL,
+      0xb004000000000000ULL,
+      -1ULL
+    }
+  },
+  { "seq.sn", TILE_OPC_SEQ_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009080000ULL,
+      0x0c46000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqb", TILE_OPC_SEQB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001000000ULL,
+      0x0842000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqb.sn", TILE_OPC_SEQB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009000000ULL,
+      0x0c42000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqh", TILE_OPC_SEQH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001040000ULL,
+      0x0844000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqh.sn", TILE_OPC_SEQH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009040000ULL,
+      0x0c44000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqi", TILE_OPC_SEQI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040b00000ULL,
+      0x3070000000000000ULL,
+      0x8000000060000000ULL,
+      0xd000000000000000ULL,
+      -1ULL
+    }
+  },
+  { "seqi.sn", TILE_OPC_SEQI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048b00000ULL,
+      0x3470000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqib", TILE_OPC_SEQIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040900000ULL,
+      0x3060000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqib.sn", TILE_OPC_SEQIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048900000ULL,
+      0x3460000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqih", TILE_OPC_SEQIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040a00000ULL,
+      0x3068000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "seqih.sn", TILE_OPC_SEQIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048a00000ULL,
+      0x3468000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sh", TILE_OPC_SH, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 17 },
+      { 0, },
+      { 0, },
+      { 15, 36 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0854000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8600000000000000ULL
+    }
+  },
+  { "shadd", TILE_OPC_SHADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 24, 17, 37 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbf8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30e8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shl", TILE_OPC_SHL, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001140000ULL,
+      0x084c000000000000ULL,
+      0x8000000020040000ULL,
+      0xa002000000000000ULL,
+      -1ULL
+    }
+  },
+  { "shl.sn", TILE_OPC_SHL_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009140000ULL,
+      0x0c4c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlb", TILE_OPC_SHLB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000010c0000ULL,
+      0x0848000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlb.sn", TILE_OPC_SHLB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000090c0000ULL,
+      0x0c48000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlh", TILE_OPC_SHLH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001100000ULL,
+      0x084a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlh.sn", TILE_OPC_SHLH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009100000ULL,
+      0x0c4a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shli", TILE_OPC_SHLI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 11, 12, 34 },
+      { 13, 14, 35 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0x80000000780e0000ULL,
+      0xf807000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070080000ULL,
+      0x4004000000000000ULL,
+      0x8000000068040000ULL,
+      0xd802000000000000ULL,
+      -1ULL
+    }
+  },
+  { "shli.sn", TILE_OPC_SHLI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078080000ULL,
+      0x4404000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlib", TILE_OPC_SHLIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070040000ULL,
+      0x4002000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlib.sn", TILE_OPC_SHLIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078040000ULL,
+      0x4402000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlih", TILE_OPC_SHLIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070060000ULL,
+      0x4003000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shlih.sn", TILE_OPC_SHLIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078060000ULL,
+      0x4403000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shr", TILE_OPC_SHR, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001200000ULL,
+      0x0852000000000000ULL,
+      0x8000000020080000ULL,
+      0xa004000000000000ULL,
+      -1ULL
+    }
+  },
+  { "shr.sn", TILE_OPC_SHR_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009200000ULL,
+      0x0c52000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrb", TILE_OPC_SHRB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001180000ULL,
+      0x084e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrb.sn", TILE_OPC_SHRB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009180000ULL,
+      0x0c4e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrh", TILE_OPC_SHRH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000011c0000ULL,
+      0x0850000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrh.sn", TILE_OPC_SHRH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000091c0000ULL,
+      0x0c50000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shri", TILE_OPC_SHRI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 11, 12, 34 },
+      { 13, 14, 35 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0x80000000780e0000ULL,
+      0xf807000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000700e0000ULL,
+      0x4007000000000000ULL,
+      0x8000000068060000ULL,
+      0xd803000000000000ULL,
+      -1ULL
+    }
+  },
+  { "shri.sn", TILE_OPC_SHRI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000780e0000ULL,
+      0x4407000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrib", TILE_OPC_SHRIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000700a0000ULL,
+      0x4005000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrib.sn", TILE_OPC_SHRIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000780a0000ULL,
+      0x4405000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrih", TILE_OPC_SHRIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000700c0000ULL,
+      0x4006000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "shrih.sn", TILE_OPC_SHRIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000780c0000ULL,
+      0x4406000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slt", TILE_OPC_SLT, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000014c0000ULL,
+      0x086a000000000000ULL,
+      0x8000000028080000ULL,
+      0xa804000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slt.sn", TILE_OPC_SLT_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000094c0000ULL,
+      0x0c6a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slt_u", TILE_OPC_SLT_U, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001500000ULL,
+      0x086c000000000000ULL,
+      0x80000000280c0000ULL,
+      0xa806000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slt_u.sn", TILE_OPC_SLT_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009500000ULL,
+      0x0c6c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltb", TILE_OPC_SLTB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001240000ULL,
+      0x0856000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltb.sn", TILE_OPC_SLTB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009240000ULL,
+      0x0c56000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltb_u", TILE_OPC_SLTB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001280000ULL,
+      0x0858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltb_u.sn", TILE_OPC_SLTB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009280000ULL,
+      0x0c58000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slte", TILE_OPC_SLTE, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000013c0000ULL,
+      0x0862000000000000ULL,
+      0x8000000028000000ULL,
+      0xa800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slte.sn", TILE_OPC_SLTE_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000093c0000ULL,
+      0x0c62000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slte_u", TILE_OPC_SLTE_U, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001400000ULL,
+      0x0864000000000000ULL,
+      0x8000000028040000ULL,
+      0xa802000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slte_u.sn", TILE_OPC_SLTE_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009400000ULL,
+      0x0c64000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteb", TILE_OPC_SLTEB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000012c0000ULL,
+      0x085a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteb.sn", TILE_OPC_SLTEB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000092c0000ULL,
+      0x0c5a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteb_u", TILE_OPC_SLTEB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001300000ULL,
+      0x085c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteb_u.sn", TILE_OPC_SLTEB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009300000ULL,
+      0x0c5c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteh", TILE_OPC_SLTEH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001340000ULL,
+      0x085e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteh.sn", TILE_OPC_SLTEH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009340000ULL,
+      0x0c5e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteh_u", TILE_OPC_SLTEH_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001380000ULL,
+      0x0860000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slteh_u.sn", TILE_OPC_SLTEH_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009380000ULL,
+      0x0c60000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slth", TILE_OPC_SLTH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001440000ULL,
+      0x0866000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slth.sn", TILE_OPC_SLTH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009440000ULL,
+      0x0c66000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slth_u", TILE_OPC_SLTH_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001480000ULL,
+      0x0868000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slth_u.sn", TILE_OPC_SLTH_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009480000ULL,
+      0x0c68000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slti", TILE_OPC_SLTI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000041000000ULL,
+      0x3098000000000000ULL,
+      0x8000000070000000ULL,
+      0xe000000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slti.sn", TILE_OPC_SLTI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000049000000ULL,
+      0x3498000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "slti_u", TILE_OPC_SLTI_U, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 11, 12, 2 },
+      { 13, 14, 3 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x8000000078000000ULL,
+      0xf800000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000041100000ULL,
+      0x30a0000000000000ULL,
+      0x8000000078000000ULL,
+      0xe800000000000000ULL,
+      -1ULL
+    }
+  },
+  { "slti_u.sn", TILE_OPC_SLTI_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000049100000ULL,
+      0x34a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltib", TILE_OPC_SLTIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040c00000ULL,
+      0x3078000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltib.sn", TILE_OPC_SLTIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048c00000ULL,
+      0x3478000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltib_u", TILE_OPC_SLTIB_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040d00000ULL,
+      0x3080000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltib_u.sn", TILE_OPC_SLTIB_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048d00000ULL,
+      0x3480000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltih", TILE_OPC_SLTIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040e00000ULL,
+      0x3088000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltih.sn", TILE_OPC_SLTIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048e00000ULL,
+      0x3488000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltih_u", TILE_OPC_SLTIH_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000040f00000ULL,
+      0x3090000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sltih_u.sn", TILE_OPC_SLTIH_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000048f00000ULL,
+      0x3490000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sne", TILE_OPC_SNE, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000015c0000ULL,
+      0x0872000000000000ULL,
+      0x80000000300c0000ULL,
+      0xb006000000000000ULL,
+      -1ULL
+    }
+  },
+  { "sne.sn", TILE_OPC_SNE_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000095c0000ULL,
+      0x0c72000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sneb", TILE_OPC_SNEB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001540000ULL,
+      0x086e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sneb.sn", TILE_OPC_SNEB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009540000ULL,
+      0x0c6e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sneh", TILE_OPC_SNEH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001580000ULL,
+      0x0870000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sneh.sn", TILE_OPC_SNEH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009580000ULL,
+      0x0c70000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sra", TILE_OPC_SRA, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001680000ULL,
+      0x0878000000000000ULL,
+      0x80000000200c0000ULL,
+      0xa006000000000000ULL,
+      -1ULL
+    }
+  },
+  { "sra.sn", TILE_OPC_SRA_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009680000ULL,
+      0x0c78000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "srab", TILE_OPC_SRAB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001600000ULL,
+      0x0874000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "srab.sn", TILE_OPC_SRAB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009600000ULL,
+      0x0c74000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "srah", TILE_OPC_SRAH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001640000ULL,
+      0x0876000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "srah.sn", TILE_OPC_SRAH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009640000ULL,
+      0x0c76000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "srai", TILE_OPC_SRAI, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 11, 12, 34 },
+      { 13, 14, 35 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0x80000000780e0000ULL,
+      0xf807000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070140000ULL,
+      0x400a000000000000ULL,
+      0x8000000068080000ULL,
+      0xd804000000000000ULL,
+      -1ULL
+    }
+  },
+  { "srai.sn", TILE_OPC_SRAI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078140000ULL,
+      0x440a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sraib", TILE_OPC_SRAIB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070100000ULL,
+      0x4008000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sraib.sn", TILE_OPC_SRAIB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078100000ULL,
+      0x4408000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sraih", TILE_OPC_SRAIH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070120000ULL,
+      0x4009000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sraih.sn", TILE_OPC_SRAIH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 32 },
+      { 9, 10, 33 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffe0000ULL,
+      0xffff000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078120000ULL,
+      0x4409000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sub", TILE_OPC_SUB, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001740000ULL,
+      0x087e000000000000ULL,
+      0x80000000080c0000ULL,
+      0x8806000000000000ULL,
+      -1ULL
+    }
+  },
+  { "sub.sn", TILE_OPC_SUB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009740000ULL,
+      0x0c7e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subb", TILE_OPC_SUBB, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000016c0000ULL,
+      0x087a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subb.sn", TILE_OPC_SUBB_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x00000000096c0000ULL,
+      0x0c7a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subbs_u", TILE_OPC_SUBBS_U, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001900000ULL,
+      0x088c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subbs_u.sn", TILE_OPC_SUBBS_U_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009900000ULL,
+      0x0c8c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subh", TILE_OPC_SUBH, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001700000ULL,
+      0x087c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subh.sn", TILE_OPC_SUBH_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009700000ULL,
+      0x0c7c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subhs", TILE_OPC_SUBHS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001940000ULL,
+      0x088e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subhs.sn", TILE_OPC_SUBHS_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009940000ULL,
+      0x0c8e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subs", TILE_OPC_SUBS, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001840000ULL,
+      0x0886000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "subs.sn", TILE_OPC_SUBS_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009840000ULL,
+      0x0c86000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "sw", TILE_OPC_SW, 0x12 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10, 17 },
+      { 0, },
+      { 0, },
+      { 15, 36 }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfe000000000000ULL,
+      0ULL,
+      0ULL,
+      0x8700000000000000ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x0880000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8700000000000000ULL
+    }
+  },
+  { "swadd", TILE_OPC_SWADD, 0x2 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 24, 17, 37 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbf8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x30f0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "swint0", TILE_OPC_SWINT0, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b900000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "swint1", TILE_OPC_SWINT1, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400b980000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "swint2", TILE_OPC_SWINT2, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400ba00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "swint3", TILE_OPC_SWINT3, 0x2 /* pipes */, 0 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    0, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      {  },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400ba80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb0", TILE_OPC_TBLIDXB0, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 31, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070168000ULL,
+      -1ULL,
+      0x80000000680a8000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb0.sn", TILE_OPC_TBLIDXB0_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078168000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb1", TILE_OPC_TBLIDXB1, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 31, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000070169000ULL,
+      -1ULL,
+      0x80000000680a9000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb1.sn", TILE_OPC_TBLIDXB1_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000078169000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb2", TILE_OPC_TBLIDXB2, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 31, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x000000007016a000ULL,
+      -1ULL,
+      0x80000000680aa000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb2.sn", TILE_OPC_TBLIDXB2_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x000000007816a000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb3", TILE_OPC_TBLIDXB3, 0x5 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 31, 12 },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0x80000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x000000007016b000ULL,
+      -1ULL,
+      0x80000000680ab000ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tblidxb3.sn", TILE_OPC_TBLIDXB3_SN, 0x1 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 21, 8 },
+      { 0, },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffff000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x000000007816b000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tns", TILE_OPC_TNS, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400bb00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "tns.sn", TILE_OPC_TNS_SN, 0x2 /* pipes */, 2 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 9, 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x440bb00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "wh64", TILE_OPC_WH64, 0x2 /* pipes */, 1 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 0, },
+      { 10 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0ULL,
+      0xfbfff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      -1ULL,
+      0x400bb80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "xor", TILE_OPC_XOR, 0xf /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 11, 12, 18 },
+      { 13, 14, 19 },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x80000000780c0000ULL,
+      0xf806000000000000ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000001780000ULL,
+      0x0882000000000000ULL,
+      0x80000000180c0000ULL,
+      0x9806000000000000ULL,
+      -1ULL
+    }
+  },
+  { "xor.sn", TILE_OPC_XOR_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 16 },
+      { 9, 10, 17 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000009780000ULL,
+      0x0c82000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "xori", TILE_OPC_XORI, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_ZERO, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000050200000ULL,
+      0x30a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { "xori.sn", TILE_OPC_XORI_SN, 0x3 /* pipes */, 3 /* num_operands */,
+    TREG_SN, /* implicitly_written_register */
+    1, /* can_bundle */
+    {
+      /* operands */
+      { 7, 8, 0 },
+      { 9, 10, 1 },
+      { 0, },
+      { 0, },
+      { 0, }
+    },
+    {
+      /* fixed_bit_masks */
+      0x800000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      /* fixed_bit_values */
+      0x0000000058200000ULL,
+      0x34a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+  },
+  { 0, TILE_OPC_NONE, 0, 0, 0, TREG_ZERO, { { 0, } }, { 0, }, { 0, }
+  }
+};
+#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6))
+#define CHILD(array_index) (TILE_OPC_NONE + (array_index))
+
+static const unsigned short decode_X0_fsm[1153] =
+{
+  BITFIELD(22, 9) /* index 0 */,
+  CHILD(513), CHILD(530), CHILD(547), CHILD(564), CHILD(596), CHILD(613),
+  CHILD(630), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, CHILD(663), CHILD(680), CHILD(697), CHILD(714), CHILD(746),
+  CHILD(763), CHILD(780), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+  CHILD(813), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+  CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(873), CHILD(878), CHILD(883),
+  CHILD(903), CHILD(908), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(913),
+  CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, CHILD(953), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(988), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, CHILD(993),
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, CHILD(1076), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(18, 4) /* index 513 */,
+  TILE_OPC_NONE, TILE_OPC_ADDB, TILE_OPC_ADDH, TILE_OPC_ADD,
+  TILE_OPC_ADIFFB_U, TILE_OPC_ADIFFH, TILE_OPC_AND, TILE_OPC_AVGB_U,
+  TILE_OPC_AVGH, TILE_OPC_CRC32_32, TILE_OPC_CRC32_8, TILE_OPC_INTHB,
+  TILE_OPC_INTHH, TILE_OPC_INTLB, TILE_OPC_INTLH, TILE_OPC_MAXB_U,
+  BITFIELD(18, 4) /* index 530 */,
+  TILE_OPC_MAXH, TILE_OPC_MINB_U, TILE_OPC_MINH, TILE_OPC_MNZB, TILE_OPC_MNZH,
+  TILE_OPC_MNZ, TILE_OPC_MULHHA_SS, TILE_OPC_MULHHA_SU, TILE_OPC_MULHHA_UU,
+  TILE_OPC_MULHHSA_UU, TILE_OPC_MULHH_SS, TILE_OPC_MULHH_SU,
+  TILE_OPC_MULHH_UU, TILE_OPC_MULHLA_SS, TILE_OPC_MULHLA_SU,
+  TILE_OPC_MULHLA_US,
+  BITFIELD(18, 4) /* index 547 */,
+  TILE_OPC_MULHLA_UU, TILE_OPC_MULHLSA_UU, TILE_OPC_MULHL_SS,
+  TILE_OPC_MULHL_SU, TILE_OPC_MULHL_US, TILE_OPC_MULHL_UU, TILE_OPC_MULLLA_SS,
+  TILE_OPC_MULLLA_SU, TILE_OPC_MULLLA_UU, TILE_OPC_MULLLSA_UU,
+  TILE_OPC_MULLL_SS, TILE_OPC_MULLL_SU, TILE_OPC_MULLL_UU, TILE_OPC_MVNZ,
+  TILE_OPC_MVZ, TILE_OPC_MZB,
+  BITFIELD(18, 4) /* index 564 */,
+  TILE_OPC_MZH, TILE_OPC_MZ, TILE_OPC_NOR, CHILD(581), TILE_OPC_PACKHB,
+  TILE_OPC_PACKLB, TILE_OPC_RL, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_S3A,
+  TILE_OPC_SADAB_U, TILE_OPC_SADAH, TILE_OPC_SADAH_U, TILE_OPC_SADB_U,
+  TILE_OPC_SADH, TILE_OPC_SADH_U,
+  BITFIELD(12, 2) /* index 581 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(586),
+  BITFIELD(14, 2) /* index 586 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(591),
+  BITFIELD(16, 2) /* index 591 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE,
+  BITFIELD(18, 4) /* index 596 */,
+  TILE_OPC_SEQB, TILE_OPC_SEQH, TILE_OPC_SEQ, TILE_OPC_SHLB, TILE_OPC_SHLH,
+  TILE_OPC_SHL, TILE_OPC_SHRB, TILE_OPC_SHRH, TILE_OPC_SHR, TILE_OPC_SLTB,
+  TILE_OPC_SLTB_U, TILE_OPC_SLTEB, TILE_OPC_SLTEB_U, TILE_OPC_SLTEH,
+  TILE_OPC_SLTEH_U, TILE_OPC_SLTE,
+  BITFIELD(18, 4) /* index 613 */,
+  TILE_OPC_SLTE_U, TILE_OPC_SLTH, TILE_OPC_SLTH_U, TILE_OPC_SLT,
+  TILE_OPC_SLT_U, TILE_OPC_SNEB, TILE_OPC_SNEH, TILE_OPC_SNE, TILE_OPC_SRAB,
+  TILE_OPC_SRAH, TILE_OPC_SRA, TILE_OPC_SUBB, TILE_OPC_SUBH, TILE_OPC_SUB,
+  TILE_OPC_XOR, TILE_OPC_DWORD_ALIGN,
+  BITFIELD(18, 3) /* index 630 */,
+  CHILD(639), CHILD(642), CHILD(645), CHILD(648), CHILD(651), CHILD(654),
+  CHILD(657), CHILD(660),
+  BITFIELD(21, 1) /* index 639 */,
+  TILE_OPC_ADDS, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 642 */,
+  TILE_OPC_SUBS, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 645 */,
+  TILE_OPC_ADDBS_U, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 648 */,
+  TILE_OPC_ADDHS, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 651 */,
+  TILE_OPC_SUBBS_U, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 654 */,
+  TILE_OPC_SUBHS, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 657 */,
+  TILE_OPC_PACKHS, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 660 */,
+  TILE_OPC_PACKBS_U, TILE_OPC_NONE,
+  BITFIELD(18, 4) /* index 663 */,
+  TILE_OPC_NONE, TILE_OPC_ADDB_SN, TILE_OPC_ADDH_SN, TILE_OPC_ADD_SN,
+  TILE_OPC_ADIFFB_U_SN, TILE_OPC_ADIFFH_SN, TILE_OPC_AND_SN,
+  TILE_OPC_AVGB_U_SN, TILE_OPC_AVGH_SN, TILE_OPC_CRC32_32_SN,
+  TILE_OPC_CRC32_8_SN, TILE_OPC_INTHB_SN, TILE_OPC_INTHH_SN,
+  TILE_OPC_INTLB_SN, TILE_OPC_INTLH_SN, TILE_OPC_MAXB_U_SN,
+  BITFIELD(18, 4) /* index 680 */,
+  TILE_OPC_MAXH_SN, TILE_OPC_MINB_U_SN, TILE_OPC_MINH_SN, TILE_OPC_MNZB_SN,
+  TILE_OPC_MNZH_SN, TILE_OPC_MNZ_SN, TILE_OPC_MULHHA_SS_SN,
+  TILE_OPC_MULHHA_SU_SN, TILE_OPC_MULHHA_UU_SN, TILE_OPC_MULHHSA_UU_SN,
+  TILE_OPC_MULHH_SS_SN, TILE_OPC_MULHH_SU_SN, TILE_OPC_MULHH_UU_SN,
+  TILE_OPC_MULHLA_SS_SN, TILE_OPC_MULHLA_SU_SN, TILE_OPC_MULHLA_US_SN,
+  BITFIELD(18, 4) /* index 697 */,
+  TILE_OPC_MULHLA_UU_SN, TILE_OPC_MULHLSA_UU_SN, TILE_OPC_MULHL_SS_SN,
+  TILE_OPC_MULHL_SU_SN, TILE_OPC_MULHL_US_SN, TILE_OPC_MULHL_UU_SN,
+  TILE_OPC_MULLLA_SS_SN, TILE_OPC_MULLLA_SU_SN, TILE_OPC_MULLLA_UU_SN,
+  TILE_OPC_MULLLSA_UU_SN, TILE_OPC_MULLL_SS_SN, TILE_OPC_MULLL_SU_SN,
+  TILE_OPC_MULLL_UU_SN, TILE_OPC_MVNZ_SN, TILE_OPC_MVZ_SN, TILE_OPC_MZB_SN,
+  BITFIELD(18, 4) /* index 714 */,
+  TILE_OPC_MZH_SN, TILE_OPC_MZ_SN, TILE_OPC_NOR_SN, CHILD(731),
+  TILE_OPC_PACKHB_SN, TILE_OPC_PACKLB_SN, TILE_OPC_RL_SN, TILE_OPC_S1A_SN,
+  TILE_OPC_S2A_SN, TILE_OPC_S3A_SN, TILE_OPC_SADAB_U_SN, TILE_OPC_SADAH_SN,
+  TILE_OPC_SADAH_U_SN, TILE_OPC_SADB_U_SN, TILE_OPC_SADH_SN,
+  TILE_OPC_SADH_U_SN,
+  BITFIELD(12, 2) /* index 731 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(736),
+  BITFIELD(14, 2) /* index 736 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(741),
+  BITFIELD(16, 2) /* index 741 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_MOVE_SN,
+  BITFIELD(18, 4) /* index 746 */,
+  TILE_OPC_SEQB_SN, TILE_OPC_SEQH_SN, TILE_OPC_SEQ_SN, TILE_OPC_SHLB_SN,
+  TILE_OPC_SHLH_SN, TILE_OPC_SHL_SN, TILE_OPC_SHRB_SN, TILE_OPC_SHRH_SN,
+  TILE_OPC_SHR_SN, TILE_OPC_SLTB_SN, TILE_OPC_SLTB_U_SN, TILE_OPC_SLTEB_SN,
+  TILE_OPC_SLTEB_U_SN, TILE_OPC_SLTEH_SN, TILE_OPC_SLTEH_U_SN,
+  TILE_OPC_SLTE_SN,
+  BITFIELD(18, 4) /* index 763 */,
+  TILE_OPC_SLTE_U_SN, TILE_OPC_SLTH_SN, TILE_OPC_SLTH_U_SN, TILE_OPC_SLT_SN,
+  TILE_OPC_SLT_U_SN, TILE_OPC_SNEB_SN, TILE_OPC_SNEH_SN, TILE_OPC_SNE_SN,
+  TILE_OPC_SRAB_SN, TILE_OPC_SRAH_SN, TILE_OPC_SRA_SN, TILE_OPC_SUBB_SN,
+  TILE_OPC_SUBH_SN, TILE_OPC_SUB_SN, TILE_OPC_XOR_SN, TILE_OPC_DWORD_ALIGN_SN,
+  BITFIELD(18, 3) /* index 780 */,
+  CHILD(789), CHILD(792), CHILD(795), CHILD(798), CHILD(801), CHILD(804),
+  CHILD(807), CHILD(810),
+  BITFIELD(21, 1) /* index 789 */,
+  TILE_OPC_ADDS_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 792 */,
+  TILE_OPC_SUBS_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 795 */,
+  TILE_OPC_ADDBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 798 */,
+  TILE_OPC_ADDHS_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 801 */,
+  TILE_OPC_SUBBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 804 */,
+  TILE_OPC_SUBHS_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 807 */,
+  TILE_OPC_PACKHS_SN, TILE_OPC_NONE,
+  BITFIELD(21, 1) /* index 810 */,
+  TILE_OPC_PACKBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(6, 2) /* index 813 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(818),
+  BITFIELD(8, 2) /* index 818 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(823),
+  BITFIELD(10, 2) /* index 823 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_MOVELI_SN,
+  BITFIELD(6, 2) /* index 828 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(833),
+  BITFIELD(8, 2) /* index 833 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(838),
+  BITFIELD(10, 2) /* index 838 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_MOVELI,
+  BITFIELD(0, 2) /* index 843 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(848),
+  BITFIELD(2, 2) /* index 848 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(853),
+  BITFIELD(4, 2) /* index 853 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(858),
+  BITFIELD(6, 2) /* index 858 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(863),
+  BITFIELD(8, 2) /* index 863 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(868),
+  BITFIELD(10, 2) /* index 868 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_INFOL,
+  BITFIELD(20, 2) /* index 873 */,
+  TILE_OPC_NONE, TILE_OPC_ADDIB, TILE_OPC_ADDIH, TILE_OPC_ADDI,
+  BITFIELD(20, 2) /* index 878 */,
+  TILE_OPC_MAXIB_U, TILE_OPC_MAXIH, TILE_OPC_MINIB_U, TILE_OPC_MINIH,
+  BITFIELD(20, 2) /* index 883 */,
+  CHILD(888), TILE_OPC_SEQIB, TILE_OPC_SEQIH, TILE_OPC_SEQI,
+  BITFIELD(6, 2) /* index 888 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(893),
+  BITFIELD(8, 2) /* index 893 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(898),
+  BITFIELD(10, 2) /* index 898 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI,
+  BITFIELD(20, 2) /* index 903 */,
+  TILE_OPC_SLTIB, TILE_OPC_SLTIB_U, TILE_OPC_SLTIH, TILE_OPC_SLTIH_U,
+  BITFIELD(20, 2) /* index 908 */,
+  TILE_OPC_SLTI, TILE_OPC_SLTI_U, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(20, 2) /* index 913 */,
+  TILE_OPC_NONE, TILE_OPC_ADDIB_SN, TILE_OPC_ADDIH_SN, TILE_OPC_ADDI_SN,
+  BITFIELD(20, 2) /* index 918 */,
+  TILE_OPC_MAXIB_U_SN, TILE_OPC_MAXIH_SN, TILE_OPC_MINIB_U_SN,
+  TILE_OPC_MINIH_SN,
+  BITFIELD(20, 2) /* index 923 */,
+  CHILD(928), TILE_OPC_SEQIB_SN, TILE_OPC_SEQIH_SN, TILE_OPC_SEQI_SN,
+  BITFIELD(6, 2) /* index 928 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(933),
+  BITFIELD(8, 2) /* index 933 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(938),
+  BITFIELD(10, 2) /* index 938 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_MOVEI_SN,
+  BITFIELD(20, 2) /* index 943 */,
+  TILE_OPC_SLTIB_SN, TILE_OPC_SLTIB_U_SN, TILE_OPC_SLTIH_SN,
+  TILE_OPC_SLTIH_U_SN,
+  BITFIELD(20, 2) /* index 948 */,
+  TILE_OPC_SLTI_SN, TILE_OPC_SLTI_U_SN, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(20, 2) /* index 953 */,
+  TILE_OPC_NONE, CHILD(958), TILE_OPC_XORI, TILE_OPC_NONE,
+  BITFIELD(0, 2) /* index 958 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(963),
+  BITFIELD(2, 2) /* index 963 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(968),
+  BITFIELD(4, 2) /* index 968 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(973),
+  BITFIELD(6, 2) /* index 973 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(978),
+  BITFIELD(8, 2) /* index 978 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(983),
+  BITFIELD(10, 2) /* index 983 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO,
+  BITFIELD(20, 2) /* index 988 */,
+  TILE_OPC_NONE, TILE_OPC_ANDI_SN, TILE_OPC_XORI_SN, TILE_OPC_NONE,
+  BITFIELD(17, 5) /* index 993 */,
+  TILE_OPC_NONE, TILE_OPC_RLI, TILE_OPC_SHLIB, TILE_OPC_SHLIH, TILE_OPC_SHLI,
+  TILE_OPC_SHRIB, TILE_OPC_SHRIH, TILE_OPC_SHRI, TILE_OPC_SRAIB,
+  TILE_OPC_SRAIH, TILE_OPC_SRAI, CHILD(1026), TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(12, 4) /* index 1026 */,
+  TILE_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052),
+  CHILD(1055), CHILD(1058), CHILD(1061), CHILD(1064), CHILD(1067),
+  CHILD(1070), CHILD(1073), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1043 */,
+  TILE_OPC_BITX, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1046 */,
+  TILE_OPC_BYTEX, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1049 */,
+  TILE_OPC_CLZ, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1052 */,
+  TILE_OPC_CTZ, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1055 */,
+  TILE_OPC_FNOP, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1058 */,
+  TILE_OPC_NOP, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1061 */,
+  TILE_OPC_PCNT, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1064 */,
+  TILE_OPC_TBLIDXB0, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1067 */,
+  TILE_OPC_TBLIDXB1, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1070 */,
+  TILE_OPC_TBLIDXB2, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1073 */,
+  TILE_OPC_TBLIDXB3, TILE_OPC_NONE,
+  BITFIELD(17, 5) /* index 1076 */,
+  TILE_OPC_NONE, TILE_OPC_RLI_SN, TILE_OPC_SHLIB_SN, TILE_OPC_SHLIH_SN,
+  TILE_OPC_SHLI_SN, TILE_OPC_SHRIB_SN, TILE_OPC_SHRIH_SN, TILE_OPC_SHRI_SN,
+  TILE_OPC_SRAIB_SN, TILE_OPC_SRAIH_SN, TILE_OPC_SRAI_SN, CHILD(1109),
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(12, 4) /* index 1109 */,
+  TILE_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135),
+  CHILD(1055), CHILD(1058), CHILD(1138), CHILD(1141), CHILD(1144),
+  CHILD(1147), CHILD(1150), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1126 */,
+  TILE_OPC_BITX_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1129 */,
+  TILE_OPC_BYTEX_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1132 */,
+  TILE_OPC_CLZ_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1135 */,
+  TILE_OPC_CTZ_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1138 */,
+  TILE_OPC_PCNT_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1141 */,
+  TILE_OPC_TBLIDXB0_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1144 */,
+  TILE_OPC_TBLIDXB1_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1147 */,
+  TILE_OPC_TBLIDXB2_SN, TILE_OPC_NONE,
+  BITFIELD(16, 1) /* index 1150 */,
+  TILE_OPC_TBLIDXB3_SN, TILE_OPC_NONE,
+};
+
+static const unsigned short decode_X1_fsm[1509] =
+{
+  BITFIELD(54, 9) /* index 0 */,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, CHILD(513), CHILD(561), CHILD(594),
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(641), CHILD(689),
+  CHILD(722), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(766),
+  CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+  CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+  CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+  CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+  CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+  CHILD(766), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+  CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+  CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+  CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+  CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+  CHILD(781), CHILD(781), CHILD(781), CHILD(796), CHILD(796), CHILD(796),
+  CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+  CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+  CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+  CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+  CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(826),
+  CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826),
+  CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826),
+  CHILD(826), CHILD(826), CHILD(826), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+  CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, CHILD(941), CHILD(950), CHILD(974), CHILD(983),
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM,
+  TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, CHILD(992),
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  CHILD(1303), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J,
+  TILE_OPC_J, TILE_OPC_J, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL,
+  TILE_OPC_JAL, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(49, 5) /* index 513 */,
+  TILE_OPC_NONE, TILE_OPC_ADDB, TILE_OPC_ADDH, TILE_OPC_ADD, TILE_OPC_AND,
+  TILE_OPC_INTHB, TILE_OPC_INTHH, TILE_OPC_INTLB, TILE_OPC_INTLH,
+  TILE_OPC_JALRP, TILE_OPC_JALR, TILE_OPC_JRP, TILE_OPC_JR, TILE_OPC_LNK,
+  TILE_OPC_MAXB_U, TILE_OPC_MAXH, TILE_OPC_MINB_U, TILE_OPC_MINH,
+  TILE_OPC_MNZB, TILE_OPC_MNZH, TILE_OPC_MNZ, TILE_OPC_MZB, TILE_OPC_MZH,
+  TILE_OPC_MZ, TILE_OPC_NOR, CHILD(546), TILE_OPC_PACKHB, TILE_OPC_PACKLB,
+  TILE_OPC_RL, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_S3A,
+  BITFIELD(43, 2) /* index 546 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(551),
+  BITFIELD(45, 2) /* index 551 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(556),
+  BITFIELD(47, 2) /* index 556 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE,
+  BITFIELD(49, 5) /* index 561 */,
+  TILE_OPC_SB, TILE_OPC_SEQB, TILE_OPC_SEQH, TILE_OPC_SEQ, TILE_OPC_SHLB,
+  TILE_OPC_SHLH, TILE_OPC_SHL, TILE_OPC_SHRB, TILE_OPC_SHRH, TILE_OPC_SHR,
+  TILE_OPC_SH, TILE_OPC_SLTB, TILE_OPC_SLTB_U, TILE_OPC_SLTEB,
+  TILE_OPC_SLTEB_U, TILE_OPC_SLTEH, TILE_OPC_SLTEH_U, TILE_OPC_SLTE,
+  TILE_OPC_SLTE_U, TILE_OPC_SLTH, TILE_OPC_SLTH_U, TILE_OPC_SLT,
+  TILE_OPC_SLT_U, TILE_OPC_SNEB, TILE_OPC_SNEH, TILE_OPC_SNE, TILE_OPC_SRAB,
+  TILE_OPC_SRAH, TILE_OPC_SRA, TILE_OPC_SUBB, TILE_OPC_SUBH, TILE_OPC_SUB,
+  BITFIELD(49, 4) /* index 594 */,
+  CHILD(611), CHILD(614), CHILD(617), CHILD(620), CHILD(623), CHILD(626),
+  CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 611 */,
+  TILE_OPC_SW, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 614 */,
+  TILE_OPC_XOR, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 617 */,
+  TILE_OPC_ADDS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 620 */,
+  TILE_OPC_SUBS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 623 */,
+  TILE_OPC_ADDBS_U, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 626 */,
+  TILE_OPC_ADDHS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 629 */,
+  TILE_OPC_SUBBS_U, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 632 */,
+  TILE_OPC_SUBHS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 635 */,
+  TILE_OPC_PACKHS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 638 */,
+  TILE_OPC_PACKBS_U, TILE_OPC_NONE,
+  BITFIELD(49, 5) /* index 641 */,
+  TILE_OPC_NONE, TILE_OPC_ADDB_SN, TILE_OPC_ADDH_SN, TILE_OPC_ADD_SN,
+  TILE_OPC_AND_SN, TILE_OPC_INTHB_SN, TILE_OPC_INTHH_SN, TILE_OPC_INTLB_SN,
+  TILE_OPC_INTLH_SN, TILE_OPC_JALRP, TILE_OPC_JALR, TILE_OPC_JRP, TILE_OPC_JR,
+  TILE_OPC_LNK_SN, TILE_OPC_MAXB_U_SN, TILE_OPC_MAXH_SN, TILE_OPC_MINB_U_SN,
+  TILE_OPC_MINH_SN, TILE_OPC_MNZB_SN, TILE_OPC_MNZH_SN, TILE_OPC_MNZ_SN,
+  TILE_OPC_MZB_SN, TILE_OPC_MZH_SN, TILE_OPC_MZ_SN, TILE_OPC_NOR_SN,
+  CHILD(674), TILE_OPC_PACKHB_SN, TILE_OPC_PACKLB_SN, TILE_OPC_RL_SN,
+  TILE_OPC_S1A_SN, TILE_OPC_S2A_SN, TILE_OPC_S3A_SN,
+  BITFIELD(43, 2) /* index 674 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(679),
+  BITFIELD(45, 2) /* index 679 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(684),
+  BITFIELD(47, 2) /* index 684 */,
+  TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_MOVE_SN,
+  BITFIELD(49, 5) /* index 689 */,
+  TILE_OPC_SB, TILE_OPC_SEQB_SN, TILE_OPC_SEQH_SN, TILE_OPC_SEQ_SN,
+  TILE_OPC_SHLB_SN, TILE_OPC_SHLH_SN, TILE_OPC_SHL_SN, TILE_OPC_SHRB_SN,
+  TILE_OPC_SHRH_SN, TILE_OPC_SHR_SN, TILE_OPC_SH, TILE_OPC_SLTB_SN,
+  TILE_OPC_SLTB_U_SN, TILE_OPC_SLTEB_SN, TILE_OPC_SLTEB_U_SN,
+  TILE_OPC_SLTEH_SN, TILE_OPC_SLTEH_U_SN, TILE_OPC_SLTE_SN,
+  TILE_OPC_SLTE_U_SN, TILE_OPC_SLTH_SN, TILE_OPC_SLTH_U_SN, TILE_OPC_SLT_SN,
+  TILE_OPC_SLT_U_SN, TILE_OPC_SNEB_SN, TILE_OPC_SNEH_SN, TILE_OPC_SNE_SN,
+  TILE_OPC_SRAB_SN, TILE_OPC_SRAH_SN, TILE_OPC_SRA_SN, TILE_OPC_SUBB_SN,
+  TILE_OPC_SUBH_SN, TILE_OPC_SUB_SN,
+  BITFIELD(49, 4) /* index 722 */,
+  CHILD(611), CHILD(739), CHILD(742), CHILD(745), CHILD(748), CHILD(751),
+  CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 739 */,
+  TILE_OPC_XOR_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 742 */,
+  TILE_OPC_ADDS_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 745 */,
+  TILE_OPC_SUBS_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 748 */,
+  TILE_OPC_ADDBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 751 */,
+  TILE_OPC_ADDHS_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 754 */,
+  TILE_OPC_SUBBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 757 */,
+  TILE_OPC_SUBHS_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 760 */,
+  TILE_OPC_PACKHS_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 763 */,
+  TILE_OPC_PACKBS_U_SN, TILE_OPC_NONE,
+  BITFIELD(37, 2) /* index 766 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(771),
+  BITFIELD(39, 2) /* index 771 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(776),
+  BITFIELD(41, 2) /* index 776 */,
+  TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_MOVELI_SN,
+  BITFIELD(37, 2) /* index 781 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(786),
+  BITFIELD(39, 2) /* index 786 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(791),
+  BITFIELD(41, 2) /* index 791 */,
+  TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_MOVELI,
+  BITFIELD(31, 2) /* index 796 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(801),
+  BITFIELD(33, 2) /* index 801 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(806),
+  BITFIELD(35, 2) /* index 806 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(811),
+  BITFIELD(37, 2) /* index 811 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(816),
+  BITFIELD(39, 2) /* index 816 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(821),
+  BITFIELD(41, 2) /* index 821 */,
+  TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_INFOL,
+  BITFIELD(31, 4) /* index 826 */,
+  TILE_OPC_BZ, TILE_OPC_BZT, TILE_OPC_BNZ, TILE_OPC_BNZT, TILE_OPC_BGZ,
+  TILE_OPC_BGZT, TILE_OPC_BGEZ, TILE_OPC_BGEZT, TILE_OPC_BLZ, TILE_OPC_BLZT,
+  TILE_OPC_BLEZ, TILE_OPC_BLEZT, TILE_OPC_BBS, TILE_OPC_BBST, TILE_OPC_BBNS,
+  TILE_OPC_BBNST,
+  BITFIELD(31, 4) /* index 843 */,
+  TILE_OPC_BZ_SN, TILE_OPC_BZT_SN, TILE_OPC_BNZ_SN, TILE_OPC_BNZT_SN,
+  TILE_OPC_BGZ_SN, TILE_OPC_BGZT_SN, TILE_OPC_BGEZ_SN, TILE_OPC_BGEZT_SN,
+  TILE_OPC_BLZ_SN, TILE_OPC_BLZT_SN, TILE_OPC_BLEZ_SN, TILE_OPC_BLEZT_SN,
+  TILE_OPC_BBS_SN, TILE_OPC_BBST_SN, TILE_OPC_BBNS_SN, TILE_OPC_BBNST_SN,
+  BITFIELD(51, 3) /* index 860 */,
+  TILE_OPC_NONE, TILE_OPC_ADDIB, TILE_OPC_ADDIH, TILE_OPC_ADDI, CHILD(869),
+  TILE_OPC_MAXIB_U, TILE_OPC_MAXIH, TILE_OPC_MFSPR,
+  BITFIELD(31, 2) /* index 869 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(874),
+  BITFIELD(33, 2) /* index 874 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(879),
+  BITFIELD(35, 2) /* index 879 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(884),
+  BITFIELD(37, 2) /* index 884 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(889),
+  BITFIELD(39, 2) /* index 889 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(894),
+  BITFIELD(41, 2) /* index 894 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO,
+  BITFIELD(51, 3) /* index 899 */,
+  TILE_OPC_MINIB_U, TILE_OPC_MINIH, TILE_OPC_MTSPR, CHILD(908),
+  TILE_OPC_SEQIB, TILE_OPC_SEQIH, TILE_OPC_SEQI, TILE_OPC_SLTIB,
+  BITFIELD(37, 2) /* index 908 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(913),
+  BITFIELD(39, 2) /* index 913 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(918),
+  BITFIELD(41, 2) /* index 918 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI,
+  BITFIELD(51, 3) /* index 923 */,
+  TILE_OPC_SLTIB_U, TILE_OPC_SLTIH, TILE_OPC_SLTIH_U, TILE_OPC_SLTI,
+  TILE_OPC_SLTI_U, TILE_OPC_XORI, TILE_OPC_LBADD, TILE_OPC_LBADD_U,
+  BITFIELD(51, 3) /* index 932 */,
+  TILE_OPC_LHADD, TILE_OPC_LHADD_U, TILE_OPC_LWADD, TILE_OPC_LWADD_NA,
+  TILE_OPC_SBADD, TILE_OPC_SHADD, TILE_OPC_SWADD, TILE_OPC_NONE,
+  BITFIELD(51, 3) /* index 941 */,
+  TILE_OPC_NONE, TILE_OPC_ADDIB_SN, TILE_OPC_ADDIH_SN, TILE_OPC_ADDI_SN,
+  TILE_OPC_ANDI_SN, TILE_OPC_MAXIB_U_SN, TILE_OPC_MAXIH_SN, TILE_OPC_MFSPR,
+  BITFIELD(51, 3) /* index 950 */,
+  TILE_OPC_MINIB_U_SN, TILE_OPC_MINIH_SN, TILE_OPC_MTSPR, CHILD(959),
+  TILE_OPC_SEQIB_SN, TILE_OPC_SEQIH_SN, TILE_OPC_SEQI_SN, TILE_OPC_SLTIB_SN,
+  BITFIELD(37, 2) /* index 959 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(964),
+  BITFIELD(39, 2) /* index 964 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(969),
+  BITFIELD(41, 2) /* index 969 */,
+  TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_MOVEI_SN,
+  BITFIELD(51, 3) /* index 974 */,
+  TILE_OPC_SLTIB_U_SN, TILE_OPC_SLTIH_SN, TILE_OPC_SLTIH_U_SN,
+  TILE_OPC_SLTI_SN, TILE_OPC_SLTI_U_SN, TILE_OPC_XORI_SN, TILE_OPC_LBADD_SN,
+  TILE_OPC_LBADD_U_SN,
+  BITFIELD(51, 3) /* index 983 */,
+  TILE_OPC_LHADD_SN, TILE_OPC_LHADD_U_SN, TILE_OPC_LWADD_SN,
+  TILE_OPC_LWADD_NA_SN, TILE_OPC_SBADD, TILE_OPC_SHADD, TILE_OPC_SWADD,
+  TILE_OPC_NONE,
+  BITFIELD(46, 7) /* index 992 */,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(1121),
+  CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124), CHILD(1124),
+  CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127), CHILD(1127),
+  CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130), CHILD(1130),
+  CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1136),
+  CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139), CHILD(1139),
+  CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142), CHILD(1142),
+  CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145), CHILD(1145),
+  CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1151),
+  CHILD(1211), CHILD(1259), CHILD(1292), TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1121 */,
+  TILE_OPC_RLI, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1124 */,
+  TILE_OPC_SHLIB, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1127 */,
+  TILE_OPC_SHLIH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1130 */,
+  TILE_OPC_SHLI, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1133 */,
+  TILE_OPC_SHRIB, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1136 */,
+  TILE_OPC_SHRIH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1139 */,
+  TILE_OPC_SHRI, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1142 */,
+  TILE_OPC_SRAIB, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1145 */,
+  TILE_OPC_SRAIH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1148 */,
+  TILE_OPC_SRAI, TILE_OPC_NONE,
+  BITFIELD(43, 3) /* index 1151 */,
+  TILE_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169),
+  CHILD(1172), CHILD(1175), CHILD(1178),
+  BITFIELD(53, 1) /* index 1160 */,
+  TILE_OPC_DRAIN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1163 */,
+  TILE_OPC_DTLBPR, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1166 */,
+  TILE_OPC_FINV, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1169 */,
+  TILE_OPC_FLUSH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1172 */,
+  TILE_OPC_FNOP, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1175 */,
+  TILE_OPC_ICOH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1178 */,
+  CHILD(1181), TILE_OPC_NONE,
+  BITFIELD(31, 2) /* index 1181 */,
+  CHILD(1186), TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_ILL,
+  BITFIELD(33, 2) /* index 1186 */,
+  TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_ILL, CHILD(1191),
+  BITFIELD(35, 2) /* index 1191 */,
+  TILE_OPC_ILL, CHILD(1196), TILE_OPC_ILL, TILE_OPC_ILL,
+  BITFIELD(37, 2) /* index 1196 */,
+  TILE_OPC_ILL, CHILD(1201), TILE_OPC_ILL, TILE_OPC_ILL,
+  BITFIELD(39, 2) /* index 1201 */,
+  TILE_OPC_ILL, CHILD(1206), TILE_OPC_ILL, TILE_OPC_ILL,
+  BITFIELD(41, 2) /* index 1206 */,
+  TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_BPT, TILE_OPC_ILL,
+  BITFIELD(43, 3) /* index 1211 */,
+  CHILD(1220), CHILD(1223), CHILD(1226), CHILD(1244), CHILD(1247),
+  CHILD(1250), CHILD(1253), CHILD(1256),
+  BITFIELD(53, 1) /* index 1220 */,
+  TILE_OPC_INV, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1223 */,
+  TILE_OPC_IRET, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1226 */,
+  CHILD(1229), TILE_OPC_NONE,
+  BITFIELD(31, 2) /* index 1229 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(1234),
+  BITFIELD(33, 2) /* index 1234 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(1239),
+  BITFIELD(35, 2) /* index 1239 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_PREFETCH,
+  BITFIELD(53, 1) /* index 1244 */,
+  TILE_OPC_LB_U, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1247 */,
+  TILE_OPC_LH, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1250 */,
+  TILE_OPC_LH_U, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1253 */,
+  TILE_OPC_LW, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1256 */,
+  TILE_OPC_MF, TILE_OPC_NONE,
+  BITFIELD(43, 3) /* index 1259 */,
+  CHILD(1268), CHILD(1271), CHILD(1274), CHILD(1277), CHILD(1280),
+  CHILD(1283), CHILD(1286), CHILD(1289),
+  BITFIELD(53, 1) /* index 1268 */,
+  TILE_OPC_NAP, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1271 */,
+  TILE_OPC_NOP, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1274 */,
+  TILE_OPC_SWINT0, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1277 */,
+  TILE_OPC_SWINT1, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1280 */,
+  TILE_OPC_SWINT2, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1283 */,
+  TILE_OPC_SWINT3, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1286 */,
+  TILE_OPC_TNS, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1289 */,
+  TILE_OPC_WH64, TILE_OPC_NONE,
+  BITFIELD(43, 2) /* index 1292 */,
+  CHILD(1297), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(45, 1) /* index 1297 */,
+  CHILD(1300), TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1300 */,
+  TILE_OPC_LW_NA, TILE_OPC_NONE,
+  BITFIELD(46, 7) /* index 1303 */,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(1432),
+  CHILD(1432), CHILD(1432), CHILD(1432), CHILD(1435), CHILD(1435),
+  CHILD(1435), CHILD(1435), CHILD(1438), CHILD(1438), CHILD(1438),
+  CHILD(1438), CHILD(1441), CHILD(1441), CHILD(1441), CHILD(1441),
+  CHILD(1444), CHILD(1444), CHILD(1444), CHILD(1444), CHILD(1447),
+  CHILD(1447), CHILD(1447), CHILD(1447), CHILD(1450), CHILD(1450),
+  CHILD(1450), CHILD(1450), CHILD(1453), CHILD(1453), CHILD(1453),
+  CHILD(1453), CHILD(1456), CHILD(1456), CHILD(1456), CHILD(1456),
+  CHILD(1459), CHILD(1459), CHILD(1459), CHILD(1459), CHILD(1151),
+  CHILD(1462), CHILD(1486), CHILD(1498), TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1432 */,
+  TILE_OPC_RLI_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1435 */,
+  TILE_OPC_SHLIB_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1438 */,
+  TILE_OPC_SHLIH_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1441 */,
+  TILE_OPC_SHLI_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1444 */,
+  TILE_OPC_SHRIB_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1447 */,
+  TILE_OPC_SHRIH_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1450 */,
+  TILE_OPC_SHRI_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1453 */,
+  TILE_OPC_SRAIB_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1456 */,
+  TILE_OPC_SRAIH_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1459 */,
+  TILE_OPC_SRAI_SN, TILE_OPC_NONE,
+  BITFIELD(43, 3) /* index 1462 */,
+  CHILD(1220), CHILD(1223), CHILD(1471), CHILD(1474), CHILD(1477),
+  CHILD(1480), CHILD(1483), CHILD(1256),
+  BITFIELD(53, 1) /* index 1471 */,
+  TILE_OPC_LB_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1474 */,
+  TILE_OPC_LB_U_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1477 */,
+  TILE_OPC_LH_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1480 */,
+  TILE_OPC_LH_U_SN, TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1483 */,
+  TILE_OPC_LW_SN, TILE_OPC_NONE,
+  BITFIELD(43, 3) /* index 1486 */,
+  CHILD(1268), CHILD(1271), CHILD(1274), CHILD(1277), CHILD(1280),
+  CHILD(1283), CHILD(1495), CHILD(1289),
+  BITFIELD(53, 1) /* index 1495 */,
+  TILE_OPC_TNS_SN, TILE_OPC_NONE,
+  BITFIELD(43, 2) /* index 1498 */,
+  CHILD(1503), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(45, 1) /* index 1503 */,
+  CHILD(1506), TILE_OPC_NONE,
+  BITFIELD(53, 1) /* index 1506 */,
+  TILE_OPC_LW_NA_SN, TILE_OPC_NONE,
+};
+
+static const unsigned short decode_Y0_fsm[168] =
+{
+  BITFIELD(27, 4) /* index 0 */,
+  TILE_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52),
+  CHILD(57), CHILD(62), CHILD(67), TILE_OPC_ADDI, CHILD(72), CHILD(102),
+  TILE_OPC_SEQI, CHILD(117), TILE_OPC_SLTI, TILE_OPC_SLTI_U,
+  BITFIELD(18, 2) /* index 17 */,
+  TILE_OPC_ADD, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_SUB,
+  BITFIELD(18, 2) /* index 22 */,
+  TILE_OPC_MNZ, TILE_OPC_MVNZ, TILE_OPC_MVZ, TILE_OPC_MZ,
+  BITFIELD(18, 2) /* index 27 */,
+  TILE_OPC_AND, TILE_OPC_NOR, CHILD(32), TILE_OPC_XOR,
+  BITFIELD(12, 2) /* index 32 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(37),
+  BITFIELD(14, 2) /* index 37 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(42),
+  BITFIELD(16, 2) /* index 42 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE,
+  BITFIELD(18, 2) /* index 47 */,
+  TILE_OPC_RL, TILE_OPC_SHL, TILE_OPC_SHR, TILE_OPC_SRA,
+  BITFIELD(18, 2) /* index 52 */,
+  TILE_OPC_SLTE, TILE_OPC_SLTE_U, TILE_OPC_SLT, TILE_OPC_SLT_U,
+  BITFIELD(18, 2) /* index 57 */,
+  TILE_OPC_MULHLSA_UU, TILE_OPC_S3A, TILE_OPC_SEQ, TILE_OPC_SNE,
+  BITFIELD(18, 2) /* index 62 */,
+  TILE_OPC_MULHH_SS, TILE_OPC_MULHH_UU, TILE_OPC_MULLL_SS, TILE_OPC_MULLL_UU,
+  BITFIELD(18, 2) /* index 67 */,
+  TILE_OPC_MULHHA_SS, TILE_OPC_MULHHA_UU, TILE_OPC_MULLLA_SS,
+  TILE_OPC_MULLLA_UU,
+  BITFIELD(0, 2) /* index 72 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(77),
+  BITFIELD(2, 2) /* index 77 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(82),
+  BITFIELD(4, 2) /* index 82 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(87),
+  BITFIELD(6, 2) /* index 87 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(92),
+  BITFIELD(8, 2) /* index 92 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(97),
+  BITFIELD(10, 2) /* index 97 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO,
+  BITFIELD(6, 2) /* index 102 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(107),
+  BITFIELD(8, 2) /* index 107 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(112),
+  BITFIELD(10, 2) /* index 112 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI,
+  BITFIELD(15, 5) /* index 117 */,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_RLI,
+  TILE_OPC_RLI, TILE_OPC_RLI, TILE_OPC_RLI, TILE_OPC_SHLI, TILE_OPC_SHLI,
+  TILE_OPC_SHLI, TILE_OPC_SHLI, TILE_OPC_SHRI, TILE_OPC_SHRI, TILE_OPC_SHRI,
+  TILE_OPC_SHRI, TILE_OPC_SRAI, TILE_OPC_SRAI, TILE_OPC_SRAI, TILE_OPC_SRAI,
+  CHILD(150), CHILD(159), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(12, 3) /* index 150 */,
+  TILE_OPC_NONE, TILE_OPC_BITX, TILE_OPC_BYTEX, TILE_OPC_CLZ, TILE_OPC_CTZ,
+  TILE_OPC_FNOP, TILE_OPC_NOP, TILE_OPC_PCNT,
+  BITFIELD(12, 3) /* index 159 */,
+  TILE_OPC_TBLIDXB0, TILE_OPC_TBLIDXB1, TILE_OPC_TBLIDXB2, TILE_OPC_TBLIDXB3,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+};
+
+static const unsigned short decode_Y1_fsm[140] =
+{
+  BITFIELD(59, 4) /* index 0 */,
+  TILE_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52),
+  CHILD(57), TILE_OPC_ADDI, CHILD(62), CHILD(92), TILE_OPC_SEQI, CHILD(107),
+  TILE_OPC_SLTI, TILE_OPC_SLTI_U, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(49, 2) /* index 17 */,
+  TILE_OPC_ADD, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_SUB,
+  BITFIELD(49, 2) /* index 22 */,
+  TILE_OPC_NONE, TILE_OPC_MNZ, TILE_OPC_MZ, TILE_OPC_NONE,
+  BITFIELD(49, 2) /* index 27 */,
+  TILE_OPC_AND, TILE_OPC_NOR, CHILD(32), TILE_OPC_XOR,
+  BITFIELD(43, 2) /* index 32 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(37),
+  BITFIELD(45, 2) /* index 37 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(42),
+  BITFIELD(47, 2) /* index 42 */,
+  TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE,
+  BITFIELD(49, 2) /* index 47 */,
+  TILE_OPC_RL, TILE_OPC_SHL, TILE_OPC_SHR, TILE_OPC_SRA,
+  BITFIELD(49, 2) /* index 52 */,
+  TILE_OPC_SLTE, TILE_OPC_SLTE_U, TILE_OPC_SLT, TILE_OPC_SLT_U,
+  BITFIELD(49, 2) /* index 57 */,
+  TILE_OPC_NONE, TILE_OPC_S3A, TILE_OPC_SEQ, TILE_OPC_SNE,
+  BITFIELD(31, 2) /* index 62 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(67),
+  BITFIELD(33, 2) /* index 67 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(72),
+  BITFIELD(35, 2) /* index 72 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(77),
+  BITFIELD(37, 2) /* index 77 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(82),
+  BITFIELD(39, 2) /* index 82 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(87),
+  BITFIELD(41, 2) /* index 87 */,
+  TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO,
+  BITFIELD(37, 2) /* index 92 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(97),
+  BITFIELD(39, 2) /* index 97 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(102),
+  BITFIELD(41, 2) /* index 102 */,
+  TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI,
+  BITFIELD(48, 3) /* index 107 */,
+  TILE_OPC_NONE, TILE_OPC_RLI, TILE_OPC_SHLI, TILE_OPC_SHRI, TILE_OPC_SRAI,
+  CHILD(116), TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(43, 3) /* index 116 */,
+  TILE_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILE_OPC_NONE,
+  TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(46, 2) /* index 125 */,
+  TILE_OPC_FNOP, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(46, 2) /* index 130 */,
+  TILE_OPC_ILL, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+  BITFIELD(46, 2) /* index 135 */,
+  TILE_OPC_NOP, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE,
+};
+
+static const unsigned short decode_Y2_fsm[24] =
+{
+  BITFIELD(56, 3) /* index 0 */,
+  CHILD(9), TILE_OPC_LB_U, TILE_OPC_LH, TILE_OPC_LH_U, TILE_OPC_LW,
+  TILE_OPC_SB, TILE_OPC_SH, TILE_OPC_SW,
+  BITFIELD(20, 2) /* index 9 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(14),
+  BITFIELD(22, 2) /* index 14 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(19),
+  BITFIELD(24, 2) /* index 19 */,
+  TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_PREFETCH,
+};
+
+#undef BITFIELD
+#undef CHILD
+const unsigned short * const
+tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS] =
+{
+  decode_X0_fsm,
+  decode_X1_fsm,
+  decode_Y0_fsm,
+  decode_Y1_fsm,
+  decode_Y2_fsm
+};
+const struct tile_sn_opcode tile_sn_opcodes[23] =
+{
+ { "bz", TILE_SN_OPC_BZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xe000
+  },
+  { "bnz", TILE_SN_OPC_BNZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xe400
+  },
+  { "jrr", TILE_SN_OPC_JRR,
+    1 /* num_operands */,
+    /* operands */
+    { 39 },
+    /* fixed_bit_mask */
+    0xff00,
+    /* fixed_bit_value */
+    0x0600
+  },
+  { "fnop", TILE_SN_OPC_FNOP,
+    0 /* num_operands */,
+    /* operands */
+    { 0, },
+    /* fixed_bit_mask */
+    0xffff,
+    /* fixed_bit_value */
+    0x0003
+  },
+  { "blz", TILE_SN_OPC_BLZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xf000
+  },
+  { "nop", TILE_SN_OPC_NOP,
+    0 /* num_operands */,
+    /* operands */
+    { 0, },
+    /* fixed_bit_mask */
+    0xffff,
+    /* fixed_bit_value */
+    0x0002
+  },
+  { "movei", TILE_SN_OPC_MOVEI,
+    1 /* num_operands */,
+    /* operands */
+    { 40 },
+    /* fixed_bit_mask */
+    0xff00,
+    /* fixed_bit_value */
+    0x0400
+  },
+  { "move", TILE_SN_OPC_MOVE,
+    2 /* num_operands */,
+    /* operands */
+    { 41, 42 },
+    /* fixed_bit_mask */
+    0xfff0,
+    /* fixed_bit_value */
+    0x0080
+  },
+  { "bgez", TILE_SN_OPC_BGEZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xf400
+  },
+  { "jr", TILE_SN_OPC_JR,
+    1 /* num_operands */,
+    /* operands */
+    { 42 },
+    /* fixed_bit_mask */
+    0xfff0,
+    /* fixed_bit_value */
+    0x0040
+  },
+  { "blez", TILE_SN_OPC_BLEZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xec00
+  },
+  { "bbns", TILE_SN_OPC_BBNS,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xfc00
+  },
+  { "jalrr", TILE_SN_OPC_JALRR,
+    1 /* num_operands */,
+    /* operands */
+    { 39 },
+    /* fixed_bit_mask */
+    0xff00,
+    /* fixed_bit_value */
+    0x0700
+  },
+  { "bpt", TILE_SN_OPC_BPT,
+    0 /* num_operands */,
+    /* operands */
+    { 0, },
+    /* fixed_bit_mask */
+    0xffff,
+    /* fixed_bit_value */
+    0x0001
+  },
+  { "jalr", TILE_SN_OPC_JALR,
+    1 /* num_operands */,
+    /* operands */
+    { 42 },
+    /* fixed_bit_mask */
+    0xfff0,
+    /* fixed_bit_value */
+    0x0050
+  },
+  { "shr1", TILE_SN_OPC_SHR1,
+    2 /* num_operands */,
+    /* operands */
+    { 41, 42 },
+    /* fixed_bit_mask */
+    0xfff0,
+    /* fixed_bit_value */
+    0x0090
+  },
+  { "bgz", TILE_SN_OPC_BGZ,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xe800
+  },
+  { "bbs", TILE_SN_OPC_BBS,
+    1 /* num_operands */,
+    /* operands */
+    { 38 },
+    /* fixed_bit_mask */
+    0xfc00,
+    /* fixed_bit_value */
+    0xf800
+  },
+  { "shl8ii", TILE_SN_OPC_SHL8II,
+    1 /* num_operands */,
+    /* operands */
+    { 39 },
+    /* fixed_bit_mask */
+    0xff00,
+    /* fixed_bit_value */
+    0x0300
+  },
+  { "addi", TILE_SN_OPC_ADDI,
+    1 /* num_operands */,
+    /* operands */
+    { 40 },
+    /* fixed_bit_mask */
+    0xff00,
+    /* fixed_bit_value */
+    0x0500
+  },
+  { "halt", TILE_SN_OPC_HALT,
+    0 /* num_operands */,
+    /* operands */
+    { 0, },
+    /* fixed_bit_mask */
+    0xffff,
+    /* fixed_bit_value */
+    0x0000
+  },
+  { "route", TILE_SN_OPC_ROUTE, 0, { 0, }, 0, 0,
+  },
+  { 0, TILE_SN_OPC_NONE, 0, { 0, }, 0, 0,
+  }
+};
+const unsigned char tile_sn_route_encode[6 * 6 * 6] =
+{
+  0xdf,
+  0xde,
+  0xdd,
+  0xdc,
+  0xdb,
+  0xda,
+  0xb9,
+  0xb8,
+  0xa1,
+  0xa0,
+  0x11,
+  0x10,
+  0x9f,
+  0x9e,
+  0x9d,
+  0x9c,
+  0x9b,
+  0x9a,
+  0x79,
+  0x78,
+  0x61,
+  0x60,
+  0xb,
+  0xa,
+  0x5f,
+  0x5e,
+  0x5d,
+  0x5c,
+  0x5b,
+  0x5a,
+  0x1f,
+  0x1e,
+  0x1d,
+  0x1c,
+  0x1b,
+  0x1a,
+  0xd7,
+  0xd6,
+  0xd5,
+  0xd4,
+  0xd3,
+  0xd2,
+  0xa7,
+  0xa6,
+  0xb1,
+  0xb0,
+  0x13,
+  0x12,
+  0x97,
+  0x96,
+  0x95,
+  0x94,
+  0x93,
+  0x92,
+  0x67,
+  0x66,
+  0x71,
+  0x70,
+  0x9,
+  0x8,
+  0x57,
+  0x56,
+  0x55,
+  0x54,
+  0x53,
+  0x52,
+  0x17,
+  0x16,
+  0x15,
+  0x14,
+  0x19,
+  0x18,
+  0xcf,
+  0xce,
+  0xcd,
+  0xcc,
+  0xcb,
+  0xca,
+  0xaf,
+  0xae,
+  0xad,
+  0xac,
+  0xab,
+  0xaa,
+  0x8f,
+  0x8e,
+  0x8d,
+  0x8c,
+  0x8b,
+  0x8a,
+  0x6f,
+  0x6e,
+  0x6d,
+  0x6c,
+  0x6b,
+  0x6a,
+  0x4f,
+  0x4e,
+  0x4d,
+  0x4c,
+  0x4b,
+  0x4a,
+  0x2f,
+  0x2e,
+  0x2d,
+  0x2c,
+  0x2b,
+  0x2a,
+  0xc9,
+  0xc8,
+  0xc5,
+  0xc4,
+  0xc3,
+  0xc2,
+  0xa9,
+  0xa8,
+  0xa5,
+  0xa4,
+  0xa3,
+  0xa2,
+  0x89,
+  0x88,
+  0x85,
+  0x84,
+  0x83,
+  0x82,
+  0x69,
+  0x68,
+  0x65,
+  0x64,
+  0x63,
+  0x62,
+  0x47,
+  0x46,
+  0x45,
+  0x44,
+  0x43,
+  0x42,
+  0x27,
+  0x26,
+  0x25,
+  0x24,
+  0x23,
+  0x22,
+  0xd9,
+  0xd8,
+  0xc1,
+  0xc0,
+  0x3b,
+  0x3a,
+  0xbf,
+  0xbe,
+  0xbd,
+  0xbc,
+  0xbb,
+  0xba,
+  0x99,
+  0x98,
+  0x81,
+  0x80,
+  0x31,
+  0x30,
+  0x7f,
+  0x7e,
+  0x7d,
+  0x7c,
+  0x7b,
+  0x7a,
+  0x59,
+  0x58,
+  0x3d,
+  0x3c,
+  0x49,
+  0x48,
+  0xf,
+  0xe,
+  0xd,
+  0xc,
+  0x29,
+  0x28,
+  0xc7,
+  0xc6,
+  0xd1,
+  0xd0,
+  0x39,
+  0x38,
+  0xb7,
+  0xb6,
+  0xb5,
+  0xb4,
+  0xb3,
+  0xb2,
+  0x87,
+  0x86,
+  0x91,
+  0x90,
+  0x33,
+  0x32,
+  0x77,
+  0x76,
+  0x75,
+  0x74,
+  0x73,
+  0x72,
+  0x3f,
+  0x3e,
+  0x51,
+  0x50,
+  0x41,
+  0x40,
+  0x37,
+  0x36,
+  0x35,
+  0x34,
+  0x21,
+  0x20
+};
+
+const signed char tile_sn_route_decode[256][3] =
+{
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { 5, 3, 1 },
+  { 4, 3, 1 },
+  { 5, 3, 0 },
+  { 4, 3, 0 },
+  { 3, 5, 4 },
+  { 2, 5, 4 },
+  { 1, 5, 4 },
+  { 0, 5, 4 },
+  { 5, 1, 0 },
+  { 4, 1, 0 },
+  { 5, 1, 1 },
+  { 4, 1, 1 },
+  { 3, 5, 1 },
+  { 2, 5, 1 },
+  { 1, 5, 1 },
+  { 0, 5, 1 },
+  { 5, 5, 1 },
+  { 4, 5, 1 },
+  { 5, 5, 0 },
+  { 4, 5, 0 },
+  { 3, 5, 0 },
+  { 2, 5, 0 },
+  { 1, 5, 0 },
+  { 0, 5, 0 },
+  { 5, 5, 5 },
+  { 4, 5, 5 },
+  { 5, 5, 3 },
+  { 4, 5, 3 },
+  { 3, 5, 3 },
+  { 2, 5, 3 },
+  { 1, 5, 3 },
+  { 0, 5, 3 },
+  { 5, 5, 4 },
+  { 4, 5, 4 },
+  { 5, 5, 2 },
+  { 4, 5, 2 },
+  { 3, 5, 2 },
+  { 2, 5, 2 },
+  { 1, 5, 2 },
+  { 0, 5, 2 },
+  { 5, 2, 4 },
+  { 4, 2, 4 },
+  { 5, 2, 5 },
+  { 4, 2, 5 },
+  { 3, 5, 5 },
+  { 2, 5, 5 },
+  { 1, 5, 5 },
+  { 0, 5, 5 },
+  { 5, 0, 5 },
+  { 4, 0, 5 },
+  { 5, 0, 4 },
+  { 4, 0, 4 },
+  { 3, 4, 4 },
+  { 2, 4, 4 },
+  { 1, 4, 5 },
+  { 0, 4, 5 },
+  { 5, 4, 5 },
+  { 4, 4, 5 },
+  { 5, 4, 3 },
+  { 4, 4, 3 },
+  { 3, 4, 3 },
+  { 2, 4, 3 },
+  { 1, 4, 3 },
+  { 0, 4, 3 },
+  { 5, 4, 4 },
+  { 4, 4, 4 },
+  { 5, 4, 2 },
+  { 4, 4, 2 },
+  { 3, 4, 2 },
+  { 2, 4, 2 },
+  { 1, 4, 2 },
+  { 0, 4, 2 },
+  { 3, 4, 5 },
+  { 2, 4, 5 },
+  { 5, 4, 1 },
+  { 4, 4, 1 },
+  { 3, 4, 1 },
+  { 2, 4, 1 },
+  { 1, 4, 1 },
+  { 0, 4, 1 },
+  { 1, 4, 4 },
+  { 0, 4, 4 },
+  { 5, 4, 0 },
+  { 4, 4, 0 },
+  { 3, 4, 0 },
+  { 2, 4, 0 },
+  { 1, 4, 0 },
+  { 0, 4, 0 },
+  { 3, 3, 0 },
+  { 2, 3, 0 },
+  { 5, 3, 3 },
+  { 4, 3, 3 },
+  { 3, 3, 3 },
+  { 2, 3, 3 },
+  { 1, 3, 1 },
+  { 0, 3, 1 },
+  { 1, 3, 3 },
+  { 0, 3, 3 },
+  { 5, 3, 2 },
+  { 4, 3, 2 },
+  { 3, 3, 2 },
+  { 2, 3, 2 },
+  { 1, 3, 2 },
+  { 0, 3, 2 },
+  { 3, 3, 1 },
+  { 2, 3, 1 },
+  { 5, 3, 5 },
+  { 4, 3, 5 },
+  { 3, 3, 5 },
+  { 2, 3, 5 },
+  { 1, 3, 5 },
+  { 0, 3, 5 },
+  { 1, 3, 0 },
+  { 0, 3, 0 },
+  { 5, 3, 4 },
+  { 4, 3, 4 },
+  { 3, 3, 4 },
+  { 2, 3, 4 },
+  { 1, 3, 4 },
+  { 0, 3, 4 },
+  { 3, 2, 4 },
+  { 2, 2, 4 },
+  { 5, 2, 3 },
+  { 4, 2, 3 },
+  { 3, 2, 3 },
+  { 2, 2, 3 },
+  { 1, 2, 5 },
+  { 0, 2, 5 },
+  { 1, 2, 3 },
+  { 0, 2, 3 },
+  { 5, 2, 2 },
+  { 4, 2, 2 },
+  { 3, 2, 2 },
+  { 2, 2, 2 },
+  { 1, 2, 2 },
+  { 0, 2, 2 },
+  { 3, 2, 5 },
+  { 2, 2, 5 },
+  { 5, 2, 1 },
+  { 4, 2, 1 },
+  { 3, 2, 1 },
+  { 2, 2, 1 },
+  { 1, 2, 1 },
+  { 0, 2, 1 },
+  { 1, 2, 4 },
+  { 0, 2, 4 },
+  { 5, 2, 0 },
+  { 4, 2, 0 },
+  { 3, 2, 0 },
+  { 2, 2, 0 },
+  { 1, 2, 0 },
+  { 0, 2, 0 },
+  { 3, 1, 0 },
+  { 2, 1, 0 },
+  { 5, 1, 3 },
+  { 4, 1, 3 },
+  { 3, 1, 3 },
+  { 2, 1, 3 },
+  { 1, 1, 1 },
+  { 0, 1, 1 },
+  { 1, 1, 3 },
+  { 0, 1, 3 },
+  { 5, 1, 2 },
+  { 4, 1, 2 },
+  { 3, 1, 2 },
+  { 2, 1, 2 },
+  { 1, 1, 2 },
+  { 0, 1, 2 },
+  { 3, 1, 1 },
+  { 2, 1, 1 },
+  { 5, 1, 5 },
+  { 4, 1, 5 },
+  { 3, 1, 5 },
+  { 2, 1, 5 },
+  { 1, 1, 5 },
+  { 0, 1, 5 },
+  { 1, 1, 0 },
+  { 0, 1, 0 },
+  { 5, 1, 4 },
+  { 4, 1, 4 },
+  { 3, 1, 4 },
+  { 2, 1, 4 },
+  { 1, 1, 4 },
+  { 0, 1, 4 },
+  { 3, 0, 4 },
+  { 2, 0, 4 },
+  { 5, 0, 3 },
+  { 4, 0, 3 },
+  { 3, 0, 3 },
+  { 2, 0, 3 },
+  { 1, 0, 5 },
+  { 0, 0, 5 },
+  { 1, 0, 3 },
+  { 0, 0, 3 },
+  { 5, 0, 2 },
+  { 4, 0, 2 },
+  { 3, 0, 2 },
+  { 2, 0, 2 },
+  { 1, 0, 2 },
+  { 0, 0, 2 },
+  { 3, 0, 5 },
+  { 2, 0, 5 },
+  { 5, 0, 1 },
+  { 4, 0, 1 },
+  { 3, 0, 1 },
+  { 2, 0, 1 },
+  { 1, 0, 1 },
+  { 0, 0, 1 },
+  { 1, 0, 4 },
+  { 0, 0, 4 },
+  { 5, 0, 0 },
+  { 4, 0, 0 },
+  { 3, 0, 0 },
+  { 2, 0, 0 },
+  { 1, 0, 0 },
+  { 0, 0, 0 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 },
+  { -1, -1, -1 }
+};
+
+const char tile_sn_direction_names[6][5] =
+{
+  "w",
+  "c",
+  "acc",
+  "n",
+  "e",
+  "s"
+};
+
+const signed char tile_sn_dest_map[6][6] = {
+  { -1, 3, 4, 5, 1, 2 } /* val -> w */,
+  { -1, 3, 4, 5, 0, 2 } /* val -> c */,
+  { -1, 3, 4, 5, 0, 1 } /* val -> acc */,
+  { -1, 4, 5, 0, 1, 2 } /* val -> n */,
+  { -1, 3, 5, 0, 1, 2 } /* val -> e */,
+  { -1, 3, 4, 0, 1, 2 } /* val -> s */
+};
+
+const struct tile_operand tile_operands[43] =
+{
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM8_X0), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_X0, /* insert */
+    get_Imm8_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM8_X1), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_X1, /* insert */
+    get_Imm8_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM8_Y0), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_Y0, /* insert */
+    get_Imm8_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM8_Y1), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_Y1, /* insert */
+    get_Imm8_Y1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM16_X0), /* default_reloc */
+    16, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm16_X0, /* insert */
+    get_Imm16_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_IMM16_X1), /* default_reloc */
+    16, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm16_X1, /* insert */
+    get_Imm16_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_ADDRESS, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_JOFFLONG_X1), /* default_reloc */
+    29, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    1, /* is_pc_relative */
+    TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, /* rightshift */
+    create_JOffLong_X1, /* insert */
+    get_JOffLong_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_X0, /* insert */
+    get_Dest_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_X0, /* insert */
+    get_SrcA_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_X1, /* insert */
+    get_Dest_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_X1, /* insert */
+    get_SrcA_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_Y0, /* insert */
+    get_Dest_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_Y0, /* insert */
+    get_SrcA_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_Y1, /* insert */
+    get_Dest_Y1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_Y1, /* insert */
+    get_SrcA_Y1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_Y2, /* insert */
+    get_SrcA_Y2  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcB_X0, /* insert */
+    get_SrcB_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcB_X1, /* insert */
+    get_SrcB_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcB_Y0, /* insert */
+    get_SrcB_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcB_Y1, /* insert */
+    get_SrcB_Y1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_ADDRESS, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_BROFF_X1), /* default_reloc */
+    17, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    1, /* is_pc_relative */
+    TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, /* rightshift */
+    create_BrOff_X1, /* insert */
+    get_BrOff_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_X0, /* insert */
+    get_Dest_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_ADDRESS, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    28, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    1, /* is_pc_relative */
+    TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, /* rightshift */
+    create_JOff_X1, /* insert */
+    get_JOff_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcBDest_Y2, /* insert */
+    get_SrcBDest_Y2  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcA_X1, /* insert */
+    get_SrcA_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_SPR, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MF_IMM15_X1), /* default_reloc */
+    15, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MF_Imm15_X1, /* insert */
+    get_MF_Imm15_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MMSTART_X0), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MMStart_X0, /* insert */
+    get_MMStart_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MMEND_X0), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MMEnd_X0, /* insert */
+    get_MMEnd_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MMSTART_X1), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MMStart_X1, /* insert */
+    get_MMStart_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MMEND_X1), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MMEnd_X1, /* insert */
+    get_MMEnd_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_SPR, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_MT_IMM15_X1), /* default_reloc */
+    15, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_MT_Imm15_X1, /* insert */
+    get_MT_Imm15_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_Y0, /* insert */
+    get_Dest_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SHAMT_X0), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_ShAmt_X0, /* insert */
+    get_ShAmt_X0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SHAMT_X1), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_ShAmt_X1, /* insert */
+    get_ShAmt_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SHAMT_Y0), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_ShAmt_Y0, /* insert */
+    get_ShAmt_Y0  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SHAMT_Y1), /* default_reloc */
+    5, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_ShAmt_Y1, /* insert */
+    get_ShAmt_Y1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    6, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_SrcBDest_Y2, /* insert */
+    get_SrcBDest_Y2  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_Imm8_X1, /* insert */
+    get_Dest_Imm8_X1  /* extract */
+  },
+  {
+    TILE_OP_TYPE_ADDRESS, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SN_BROFF), /* default_reloc */
+    10, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    1, /* is_pc_relative */
+    TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES, /* rightshift */
+    create_BrOff_SN, /* insert */
+    get_BrOff_SN  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SN_UIMM8), /* default_reloc */
+    8, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_SN, /* insert */
+    get_Imm8_SN  /* extract */
+  },
+  {
+    TILE_OP_TYPE_IMMEDIATE, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_TILE_SN_IMM8), /* default_reloc */
+    8, /* num_bits */
+    1, /* is_signed */
+    0, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Imm8_SN, /* insert */
+    get_Imm8_SN  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    2, /* num_bits */
+    0, /* is_signed */
+    0, /* is_src_reg */
+    1, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Dest_SN, /* insert */
+    get_Dest_SN  /* extract */
+  },
+  {
+    TILE_OP_TYPE_REGISTER, /* type */
+    MAYBE_BFD_RELOC(BFD_RELOC_NONE), /* default_reloc */
+    2, /* num_bits */
+    0, /* is_signed */
+    1, /* is_src_reg */
+    0, /* is_dest_reg */
+    0, /* is_pc_relative */
+    0, /* rightshift */
+    create_Src_SN, /* insert */
+    get_Src_SN  /* extract */
+  }
+};
+
+const struct tile_spr tile_sprs[] = {
+  { 0, "MPL_ITLB_MISS_SET_0" },
+  { 1, "MPL_ITLB_MISS_SET_1" },
+  { 2, "MPL_ITLB_MISS_SET_2" },
+  { 3, "MPL_ITLB_MISS_SET_3" },
+  { 4, "MPL_ITLB_MISS" },
+  { 256, "ITLB_CURRENT_0" },
+  { 257, "ITLB_CURRENT_1" },
+  { 258, "ITLB_CURRENT_2" },
+  { 259, "ITLB_CURRENT_3" },
+  { 260, "ITLB_INDEX" },
+  { 261, "ITLB_MATCH_0" },
+  { 262, "ITLB_PR" },
+  { 263, "NUMBER_ITLB" },
+  { 264, "REPLACEMENT_ITLB" },
+  { 265, "WIRED_ITLB" },
+  { 266, "ITLB_PERF" },
+  { 512, "MPL_MEM_ERROR_SET_0" },
+  { 513, "MPL_MEM_ERROR_SET_1" },
+  { 514, "MPL_MEM_ERROR_SET_2" },
+  { 515, "MPL_MEM_ERROR_SET_3" },
+  { 516, "MPL_MEM_ERROR" },
+  { 517, "L1_I_ERROR" },
+  { 518, "MEM_ERROR_CBOX_ADDR" },
+  { 519, "MEM_ERROR_CBOX_STATUS" },
+  { 520, "MEM_ERROR_ENABLE" },
+  { 521, "MEM_ERROR_MBOX_ADDR" },
+  { 522, "MEM_ERROR_MBOX_STATUS" },
+  { 523, "SNIC_ERROR_LOG_STATUS" },
+  { 524, "SNIC_ERROR_LOG_VA" },
+  { 525, "XDN_DEMUX_ERROR" },
+  { 1024, "MPL_ILL_SET_0" },
+  { 1025, "MPL_ILL_SET_1" },
+  { 1026, "MPL_ILL_SET_2" },
+  { 1027, "MPL_ILL_SET_3" },
+  { 1028, "MPL_ILL" },
+  { 1536, "MPL_GPV_SET_0" },
+  { 1537, "MPL_GPV_SET_1" },
+  { 1538, "MPL_GPV_SET_2" },
+  { 1539, "MPL_GPV_SET_3" },
+  { 1540, "MPL_GPV" },
+  { 1541, "GPV_REASON" },
+  { 2048, "MPL_SN_ACCESS_SET_0" },
+  { 2049, "MPL_SN_ACCESS_SET_1" },
+  { 2050, "MPL_SN_ACCESS_SET_2" },
+  { 2051, "MPL_SN_ACCESS_SET_3" },
+  { 2052, "MPL_SN_ACCESS" },
+  { 2053, "SNCTL" },
+  { 2054, "SNFIFO_DATA" },
+  { 2055, "SNFIFO_SEL" },
+  { 2056, "SNIC_INVADDR" },
+  { 2057, "SNISTATE" },
+  { 2058, "SNOSTATE" },
+  { 2059, "SNPC" },
+  { 2060, "SNSTATIC" },
+  { 2304, "SN_DATA_AVAIL" },
+  { 2560, "MPL_IDN_ACCESS_SET_0" },
+  { 2561, "MPL_IDN_ACCESS_SET_1" },
+  { 2562, "MPL_IDN_ACCESS_SET_2" },
+  { 2563, "MPL_IDN_ACCESS_SET_3" },
+  { 2564, "MPL_IDN_ACCESS" },
+  { 2565, "IDN_DEMUX_CA_COUNT" },
+  { 2566, "IDN_DEMUX_COUNT_0" },
+  { 2567, "IDN_DEMUX_COUNT_1" },
+  { 2568, "IDN_DEMUX_CTL" },
+  { 2569, "IDN_DEMUX_CURR_TAG" },
+  { 2570, "IDN_DEMUX_QUEUE_SEL" },
+  { 2571, "IDN_DEMUX_STATUS" },
+  { 2572, "IDN_DEMUX_WRITE_FIFO" },
+  { 2573, "IDN_DEMUX_WRITE_QUEUE" },
+  { 2574, "IDN_PENDING" },
+  { 2575, "IDN_SP_FIFO_DATA" },
+  { 2576, "IDN_SP_FIFO_SEL" },
+  { 2577, "IDN_SP_FREEZE" },
+  { 2578, "IDN_SP_STATE" },
+  { 2579, "IDN_TAG_0" },
+  { 2580, "IDN_TAG_1" },
+  { 2581, "IDN_TAG_VALID" },
+  { 2582, "IDN_TILE_COORD" },
+  { 2816, "IDN_CA_DATA" },
+  { 2817, "IDN_CA_REM" },
+  { 2818, "IDN_CA_TAG" },
+  { 2819, "IDN_DATA_AVAIL" },
+  { 3072, "MPL_UDN_ACCESS_SET_0" },
+  { 3073, "MPL_UDN_ACCESS_SET_1" },
+  { 3074, "MPL_UDN_ACCESS_SET_2" },
+  { 3075, "MPL_UDN_ACCESS_SET_3" },
+  { 3076, "MPL_UDN_ACCESS" },
+  { 3077, "UDN_DEMUX_CA_COUNT" },
+  { 3078, "UDN_DEMUX_COUNT_0" },
+  { 3079, "UDN_DEMUX_COUNT_1" },
+  { 3080, "UDN_DEMUX_COUNT_2" },
+  { 3081, "UDN_DEMUX_COUNT_3" },
+  { 3082, "UDN_DEMUX_CTL" },
+  { 3083, "UDN_DEMUX_CURR_TAG" },
+  { 3084, "UDN_DEMUX_QUEUE_SEL" },
+  { 3085, "UDN_DEMUX_STATUS" },
+  { 3086, "UDN_DEMUX_WRITE_FIFO" },
+  { 3087, "UDN_DEMUX_WRITE_QUEUE" },
+  { 3088, "UDN_PENDING" },
+  { 3089, "UDN_SP_FIFO_DATA" },
+  { 3090, "UDN_SP_FIFO_SEL" },
+  { 3091, "UDN_SP_FREEZE" },
+  { 3092, "UDN_SP_STATE" },
+  { 3093, "UDN_TAG_0" },
+  { 3094, "UDN_TAG_1" },
+  { 3095, "UDN_TAG_2" },
+  { 3096, "UDN_TAG_3" },
+  { 3097, "UDN_TAG_VALID" },
+  { 3098, "UDN_TILE_COORD" },
+  { 3328, "UDN_CA_DATA" },
+  { 3329, "UDN_CA_REM" },
+  { 3330, "UDN_CA_TAG" },
+  { 3331, "UDN_DATA_AVAIL" },
+  { 3584, "MPL_IDN_REFILL_SET_0" },
+  { 3585, "MPL_IDN_REFILL_SET_1" },
+  { 3586, "MPL_IDN_REFILL_SET_2" },
+  { 3587, "MPL_IDN_REFILL_SET_3" },
+  { 3588, "MPL_IDN_REFILL" },
+  { 3589, "IDN_REFILL_EN" },
+  { 4096, "MPL_UDN_REFILL_SET_0" },
+  { 4097, "MPL_UDN_REFILL_SET_1" },
+  { 4098, "MPL_UDN_REFILL_SET_2" },
+  { 4099, "MPL_UDN_REFILL_SET_3" },
+  { 4100, "MPL_UDN_REFILL" },
+  { 4101, "UDN_REFILL_EN" },
+  { 4608, "MPL_IDN_COMPLETE_SET_0" },
+  { 4609, "MPL_IDN_COMPLETE_SET_1" },
+  { 4610, "MPL_IDN_COMPLETE_SET_2" },
+  { 4611, "MPL_IDN_COMPLETE_SET_3" },
+  { 4612, "MPL_IDN_COMPLETE" },
+  { 4613, "IDN_REMAINING" },
+  { 5120, "MPL_UDN_COMPLETE_SET_0" },
+  { 5121, "MPL_UDN_COMPLETE_SET_1" },
+  { 5122, "MPL_UDN_COMPLETE_SET_2" },
+  { 5123, "MPL_UDN_COMPLETE_SET_3" },
+  { 5124, "MPL_UDN_COMPLETE" },
+  { 5125, "UDN_REMAINING" },
+  { 5632, "MPL_SWINT_3_SET_0" },
+  { 5633, "MPL_SWINT_3_SET_1" },
+  { 5634, "MPL_SWINT_3_SET_2" },
+  { 5635, "MPL_SWINT_3_SET_3" },
+  { 5636, "MPL_SWINT_3" },
+  { 6144, "MPL_SWINT_2_SET_0" },
+  { 6145, "MPL_SWINT_2_SET_1" },
+  { 6146, "MPL_SWINT_2_SET_2" },
+  { 6147, "MPL_SWINT_2_SET_3" },
+  { 6148, "MPL_SWINT_2" },
+  { 6656, "MPL_SWINT_1_SET_0" },
+  { 6657, "MPL_SWINT_1_SET_1" },
+  { 6658, "MPL_SWINT_1_SET_2" },
+  { 6659, "MPL_SWINT_1_SET_3" },
+  { 6660, "MPL_SWINT_1" },
+  { 7168, "MPL_SWINT_0_SET_0" },
+  { 7169, "MPL_SWINT_0_SET_1" },
+  { 7170, "MPL_SWINT_0_SET_2" },
+  { 7171, "MPL_SWINT_0_SET_3" },
+  { 7172, "MPL_SWINT_0" },
+  { 7680, "MPL_UNALIGN_DATA_SET_0" },
+  { 7681, "MPL_UNALIGN_DATA_SET_1" },
+  { 7682, "MPL_UNALIGN_DATA_SET_2" },
+  { 7683, "MPL_UNALIGN_DATA_SET_3" },
+  { 7684, "MPL_UNALIGN_DATA" },
+  { 8192, "MPL_DTLB_MISS_SET_0" },
+  { 8193, "MPL_DTLB_MISS_SET_1" },
+  { 8194, "MPL_DTLB_MISS_SET_2" },
+  { 8195, "MPL_DTLB_MISS_SET_3" },
+  { 8196, "MPL_DTLB_MISS" },
+  { 8448, "AER_0" },
+  { 8449, "AER_1" },
+  { 8450, "DTLB_BAD_ADDR" },
+  { 8451, "DTLB_BAD_ADDR_REASON" },
+  { 8452, "DTLB_CURRENT_0" },
+  { 8453, "DTLB_CURRENT_1" },
+  { 8454, "DTLB_CURRENT_2" },
+  { 8455, "DTLB_CURRENT_3" },
+  { 8456, "DTLB_INDEX" },
+  { 8457, "DTLB_MATCH_0" },
+  { 8458, "NUMBER_DTLB" },
+  { 8459, "PHYSICAL_MEMORY_MODE" },
+  { 8460, "REPLACEMENT_DTLB" },
+  { 8461, "WIRED_DTLB" },
+  { 8462, "CACHE_RED_WAY_OVERRIDDEN" },
+  { 8463, "DTLB_PERF" },
+  { 8704, "MPL_DTLB_ACCESS_SET_0" },
+  { 8705, "MPL_DTLB_ACCESS_SET_1" },
+  { 8706, "MPL_DTLB_ACCESS_SET_2" },
+  { 8707, "MPL_DTLB_ACCESS_SET_3" },
+  { 8708, "MPL_DTLB_ACCESS" },
+  { 9216, "MPL_DMATLB_MISS_SET_0" },
+  { 9217, "MPL_DMATLB_MISS_SET_1" },
+  { 9218, "MPL_DMATLB_MISS_SET_2" },
+  { 9219, "MPL_DMATLB_MISS_SET_3" },
+  { 9220, "MPL_DMATLB_MISS" },
+  { 9472, "DMA_BAD_ADDR" },
+  { 9473, "DMA_STATUS" },
+  { 9728, "MPL_DMATLB_ACCESS_SET_0" },
+  { 9729, "MPL_DMATLB_ACCESS_SET_1" },
+  { 9730, "MPL_DMATLB_ACCESS_SET_2" },
+  { 9731, "MPL_DMATLB_ACCESS_SET_3" },
+  { 9732, "MPL_DMATLB_ACCESS" },
+  { 10240, "MPL_SNITLB_MISS_SET_0" },
+  { 10241, "MPL_SNITLB_MISS_SET_1" },
+  { 10242, "MPL_SNITLB_MISS_SET_2" },
+  { 10243, "MPL_SNITLB_MISS_SET_3" },
+  { 10244, "MPL_SNITLB_MISS" },
+  { 10245, "NUMBER_SNITLB" },
+  { 10246, "REPLACEMENT_SNITLB" },
+  { 10247, "SNITLB_CURRENT_0" },
+  { 10248, "SNITLB_CURRENT_1" },
+  { 10249, "SNITLB_CURRENT_2" },
+  { 10250, "SNITLB_CURRENT_3" },
+  { 10251, "SNITLB_INDEX" },
+  { 10252, "SNITLB_MATCH_0" },
+  { 10253, "SNITLB_PR" },
+  { 10254, "WIRED_SNITLB" },
+  { 10255, "SNITLB_STATUS" },
+  { 10752, "MPL_SN_NOTIFY_SET_0" },
+  { 10753, "MPL_SN_NOTIFY_SET_1" },
+  { 10754, "MPL_SN_NOTIFY_SET_2" },
+  { 10755, "MPL_SN_NOTIFY_SET_3" },
+  { 10756, "MPL_SN_NOTIFY" },
+  { 10757, "SN_NOTIFY_STATUS" },
+  { 11264, "MPL_SN_FIREWALL_SET_0" },
+  { 11265, "MPL_SN_FIREWALL_SET_1" },
+  { 11266, "MPL_SN_FIREWALL_SET_2" },
+  { 11267, "MPL_SN_FIREWALL_SET_3" },
+  { 11268, "MPL_SN_FIREWALL" },
+  { 11269, "SN_DIRECTION_PROTECT" },
+  { 11776, "MPL_IDN_FIREWALL_SET_0" },
+  { 11777, "MPL_IDN_FIREWALL_SET_1" },
+  { 11778, "MPL_IDN_FIREWALL_SET_2" },
+  { 11779, "MPL_IDN_FIREWALL_SET_3" },
+  { 11780, "MPL_IDN_FIREWALL" },
+  { 11781, "IDN_DIRECTION_PROTECT" },
+  { 12288, "MPL_UDN_FIREWALL_SET_0" },
+  { 12289, "MPL_UDN_FIREWALL_SET_1" },
+  { 12290, "MPL_UDN_FIREWALL_SET_2" },
+  { 12291, "MPL_UDN_FIREWALL_SET_3" },
+  { 12292, "MPL_UDN_FIREWALL" },
+  { 12293, "UDN_DIRECTION_PROTECT" },
+  { 12800, "MPL_TILE_TIMER_SET_0" },
+  { 12801, "MPL_TILE_TIMER_SET_1" },
+  { 12802, "MPL_TILE_TIMER_SET_2" },
+  { 12803, "MPL_TILE_TIMER_SET_3" },
+  { 12804, "MPL_TILE_TIMER" },
+  { 12805, "TILE_TIMER_CONTROL" },
+  { 13312, "MPL_IDN_TIMER_SET_0" },
+  { 13313, "MPL_IDN_TIMER_SET_1" },
+  { 13314, "MPL_IDN_TIMER_SET_2" },
+  { 13315, "MPL_IDN_TIMER_SET_3" },
+  { 13316, "MPL_IDN_TIMER" },
+  { 13317, "IDN_DEADLOCK_COUNT" },
+  { 13318, "IDN_DEADLOCK_TIMEOUT" },
+  { 13824, "MPL_UDN_TIMER_SET_0" },
+  { 13825, "MPL_UDN_TIMER_SET_1" },
+  { 13826, "MPL_UDN_TIMER_SET_2" },
+  { 13827, "MPL_UDN_TIMER_SET_3" },
+  { 13828, "MPL_UDN_TIMER" },
+  { 13829, "UDN_DEADLOCK_COUNT" },
+  { 13830, "UDN_DEADLOCK_TIMEOUT" },
+  { 14336, "MPL_DMA_NOTIFY_SET_0" },
+  { 14337, "MPL_DMA_NOTIFY_SET_1" },
+  { 14338, "MPL_DMA_NOTIFY_SET_2" },
+  { 14339, "MPL_DMA_NOTIFY_SET_3" },
+  { 14340, "MPL_DMA_NOTIFY" },
+  { 14592, "DMA_BYTE" },
+  { 14593, "DMA_CHUNK_SIZE" },
+  { 14594, "DMA_CTR" },
+  { 14595, "DMA_DST_ADDR" },
+  { 14596, "DMA_DST_CHUNK_ADDR" },
+  { 14597, "DMA_SRC_ADDR" },
+  { 14598, "DMA_SRC_CHUNK_ADDR" },
+  { 14599, "DMA_STRIDE" },
+  { 14600, "DMA_USER_STATUS" },
+  { 14848, "MPL_IDN_CA_SET_0" },
+  { 14849, "MPL_IDN_CA_SET_1" },
+  { 14850, "MPL_IDN_CA_SET_2" },
+  { 14851, "MPL_IDN_CA_SET_3" },
+  { 14852, "MPL_IDN_CA" },
+  { 15360, "MPL_UDN_CA_SET_0" },
+  { 15361, "MPL_UDN_CA_SET_1" },
+  { 15362, "MPL_UDN_CA_SET_2" },
+  { 15363, "MPL_UDN_CA_SET_3" },
+  { 15364, "MPL_UDN_CA" },
+  { 15872, "MPL_IDN_AVAIL_SET_0" },
+  { 15873, "MPL_IDN_AVAIL_SET_1" },
+  { 15874, "MPL_IDN_AVAIL_SET_2" },
+  { 15875, "MPL_IDN_AVAIL_SET_3" },
+  { 15876, "MPL_IDN_AVAIL" },
+  { 15877, "IDN_AVAIL_EN" },
+  { 16384, "MPL_UDN_AVAIL_SET_0" },
+  { 16385, "MPL_UDN_AVAIL_SET_1" },
+  { 16386, "MPL_UDN_AVAIL_SET_2" },
+  { 16387, "MPL_UDN_AVAIL_SET_3" },
+  { 16388, "MPL_UDN_AVAIL" },
+  { 16389, "UDN_AVAIL_EN" },
+  { 16896, "MPL_PERF_COUNT_SET_0" },
+  { 16897, "MPL_PERF_COUNT_SET_1" },
+  { 16898, "MPL_PERF_COUNT_SET_2" },
+  { 16899, "MPL_PERF_COUNT_SET_3" },
+  { 16900, "MPL_PERF_COUNT" },
+  { 16901, "PERF_COUNT_0" },
+  { 16902, "PERF_COUNT_1" },
+  { 16903, "PERF_COUNT_CTL" },
+  { 16904, "PERF_COUNT_STS" },
+  { 16905, "WATCH_CTL" },
+  { 16906, "WATCH_MASK" },
+  { 16907, "WATCH_VAL" },
+  { 16912, "PERF_COUNT_DN_CTL" },
+  { 17408, "MPL_INTCTRL_3_SET_0" },
+  { 17409, "MPL_INTCTRL_3_SET_1" },
+  { 17410, "MPL_INTCTRL_3_SET_2" },
+  { 17411, "MPL_INTCTRL_3_SET_3" },
+  { 17412, "MPL_INTCTRL_3" },
+  { 17413, "EX_CONTEXT_3_0" },
+  { 17414, "EX_CONTEXT_3_1" },
+  { 17415, "INTERRUPT_MASK_3_0" },
+  { 17416, "INTERRUPT_MASK_3_1" },
+  { 17417, "INTERRUPT_MASK_RESET_3_0" },
+  { 17418, "INTERRUPT_MASK_RESET_3_1" },
+  { 17419, "INTERRUPT_MASK_SET_3_0" },
+  { 17420, "INTERRUPT_MASK_SET_3_1" },
+  { 17432, "INTCTRL_3_STATUS" },
+  { 17664, "SYSTEM_SAVE_3_0" },
+  { 17665, "SYSTEM_SAVE_3_1" },
+  { 17666, "SYSTEM_SAVE_3_2" },
+  { 17667, "SYSTEM_SAVE_3_3" },
+  { 17920, "MPL_INTCTRL_2_SET_0" },
+  { 17921, "MPL_INTCTRL_2_SET_1" },
+  { 17922, "MPL_INTCTRL_2_SET_2" },
+  { 17923, "MPL_INTCTRL_2_SET_3" },
+  { 17924, "MPL_INTCTRL_2" },
+  { 17925, "EX_CONTEXT_2_0" },
+  { 17926, "EX_CONTEXT_2_1" },
+  { 17927, "INTCTRL_2_STATUS" },
+  { 17928, "INTERRUPT_MASK_2_0" },
+  { 17929, "INTERRUPT_MASK_2_1" },
+  { 17930, "INTERRUPT_MASK_RESET_2_0" },
+  { 17931, "INTERRUPT_MASK_RESET_2_1" },
+  { 17932, "INTERRUPT_MASK_SET_2_0" },
+  { 17933, "INTERRUPT_MASK_SET_2_1" },
+  { 18176, "SYSTEM_SAVE_2_0" },
+  { 18177, "SYSTEM_SAVE_2_1" },
+  { 18178, "SYSTEM_SAVE_2_2" },
+  { 18179, "SYSTEM_SAVE_2_3" },
+  { 18432, "MPL_INTCTRL_1_SET_0" },
+  { 18433, "MPL_INTCTRL_1_SET_1" },
+  { 18434, "MPL_INTCTRL_1_SET_2" },
+  { 18435, "MPL_INTCTRL_1_SET_3" },
+  { 18436, "MPL_INTCTRL_1" },
+  { 18437, "EX_CONTEXT_1_0" },
+  { 18438, "EX_CONTEXT_1_1" },
+  { 18439, "INTCTRL_1_STATUS" },
+  { 18440, "INTCTRL_3_STATUS_REV0" },
+  { 18441, "INTERRUPT_MASK_1_0" },
+  { 18442, "INTERRUPT_MASK_1_1" },
+  { 18443, "INTERRUPT_MASK_RESET_1_0" },
+  { 18444, "INTERRUPT_MASK_RESET_1_1" },
+  { 18445, "INTERRUPT_MASK_SET_1_0" },
+  { 18446, "INTERRUPT_MASK_SET_1_1" },
+  { 18688, "SYSTEM_SAVE_1_0" },
+  { 18689, "SYSTEM_SAVE_1_1" },
+  { 18690, "SYSTEM_SAVE_1_2" },
+  { 18691, "SYSTEM_SAVE_1_3" },
+  { 18944, "MPL_INTCTRL_0_SET_0" },
+  { 18945, "MPL_INTCTRL_0_SET_1" },
+  { 18946, "MPL_INTCTRL_0_SET_2" },
+  { 18947, "MPL_INTCTRL_0_SET_3" },
+  { 18948, "MPL_INTCTRL_0" },
+  { 18949, "EX_CONTEXT_0_0" },
+  { 18950, "EX_CONTEXT_0_1" },
+  { 18951, "INTCTRL_0_STATUS" },
+  { 18952, "INTERRUPT_MASK_0_0" },
+  { 18953, "INTERRUPT_MASK_0_1" },
+  { 18954, "INTERRUPT_MASK_RESET_0_0" },
+  { 18955, "INTERRUPT_MASK_RESET_0_1" },
+  { 18956, "INTERRUPT_MASK_SET_0_0" },
+  { 18957, "INTERRUPT_MASK_SET_0_1" },
+  { 19200, "SYSTEM_SAVE_0_0" },
+  { 19201, "SYSTEM_SAVE_0_1" },
+  { 19202, "SYSTEM_SAVE_0_2" },
+  { 19203, "SYSTEM_SAVE_0_3" },
+  { 19456, "MPL_BOOT_ACCESS_SET_0" },
+  { 19457, "MPL_BOOT_ACCESS_SET_1" },
+  { 19458, "MPL_BOOT_ACCESS_SET_2" },
+  { 19459, "MPL_BOOT_ACCESS_SET_3" },
+  { 19460, "MPL_BOOT_ACCESS" },
+  { 19461, "CBOX_CACHEASRAM_CONFIG" },
+  { 19462, "CBOX_CACHE_CONFIG" },
+  { 19463, "CBOX_MMAP_0" },
+  { 19464, "CBOX_MMAP_1" },
+  { 19465, "CBOX_MMAP_2" },
+  { 19466, "CBOX_MMAP_3" },
+  { 19467, "CBOX_MSR" },
+  { 19468, "CBOX_SRC_ID" },
+  { 19469, "CYCLE_HIGH_MODIFY" },
+  { 19470, "CYCLE_LOW_MODIFY" },
+  { 19471, "DIAG_BCST_CTL" },
+  { 19472, "DIAG_BCST_MASK" },
+  { 19473, "DIAG_BCST_TRIGGER" },
+  { 19474, "DIAG_MUX_CTL" },
+  { 19475, "DIAG_TRACE_CTL" },
+  { 19476, "DIAG_TRACE_STS" },
+  { 19477, "IDN_DEMUX_BUF_THRESH" },
+  { 19478, "SBOX_CONFIG" },
+  { 19479, "TILE_COORD" },
+  { 19480, "UDN_DEMUX_BUF_THRESH" },
+  { 19481, "CBOX_HOME_MAP_ADDR" },
+  { 19482, "CBOX_HOME_MAP_DATA" },
+  { 19483, "CBOX_MSR1" },
+  { 19484, "BIG_ENDIAN_CONFIG" },
+  { 19485, "MEM_STRIPE_CONFIG" },
+  { 19486, "DIAG_TRACE_WAY" },
+  { 19487, "VDN_SNOOP_SHIM_CTL" },
+  { 19488, "PERF_COUNT_PLS" },
+  { 19489, "DIAG_TRACE_DATA" },
+  { 19712, "I_AER_0" },
+  { 19713, "I_AER_1" },
+  { 19714, "I_PHYSICAL_MEMORY_MODE" },
+  { 19968, "MPL_WORLD_ACCESS_SET_0" },
+  { 19969, "MPL_WORLD_ACCESS_SET_1" },
+  { 19970, "MPL_WORLD_ACCESS_SET_2" },
+  { 19971, "MPL_WORLD_ACCESS_SET_3" },
+  { 19972, "MPL_WORLD_ACCESS" },
+  { 19973, "SIM_SOCKET" },
+  { 19974, "CYCLE_HIGH" },
+  { 19975, "CYCLE_LOW" },
+  { 19976, "DONE" },
+  { 19977, "FAIL" },
+  { 19978, "INTERRUPT_CRITICAL_SECTION" },
+  { 19979, "PASS" },
+  { 19980, "SIM_CONTROL" },
+  { 19981, "EVENT_BEGIN" },
+  { 19982, "EVENT_END" },
+  { 19983, "TILE_WRITE_PENDING" },
+  { 19984, "TILE_RTF_HWM" },
+  { 20224, "PROC_STATUS" },
+  { 20225, "STATUS_SATURATE" },
+  { 20480, "MPL_I_ASID_SET_0" },
+  { 20481, "MPL_I_ASID_SET_1" },
+  { 20482, "MPL_I_ASID_SET_2" },
+  { 20483, "MPL_I_ASID_SET_3" },
+  { 20484, "MPL_I_ASID" },
+  { 20485, "I_ASID" },
+  { 20992, "MPL_D_ASID_SET_0" },
+  { 20993, "MPL_D_ASID_SET_1" },
+  { 20994, "MPL_D_ASID_SET_2" },
+  { 20995, "MPL_D_ASID_SET_3" },
+  { 20996, "MPL_D_ASID" },
+  { 20997, "D_ASID" },
+  { 21504, "MPL_DMA_ASID_SET_0" },
+  { 21505, "MPL_DMA_ASID_SET_1" },
+  { 21506, "MPL_DMA_ASID_SET_2" },
+  { 21507, "MPL_DMA_ASID_SET_3" },
+  { 21508, "MPL_DMA_ASID" },
+  { 21509, "DMA_ASID" },
+  { 22016, "MPL_SNI_ASID_SET_0" },
+  { 22017, "MPL_SNI_ASID_SET_1" },
+  { 22018, "MPL_SNI_ASID_SET_2" },
+  { 22019, "MPL_SNI_ASID_SET_3" },
+  { 22020, "MPL_SNI_ASID" },
+  { 22021, "SNI_ASID" },
+  { 22528, "MPL_DMA_CPL_SET_0" },
+  { 22529, "MPL_DMA_CPL_SET_1" },
+  { 22530, "MPL_DMA_CPL_SET_2" },
+  { 22531, "MPL_DMA_CPL_SET_3" },
+  { 22532, "MPL_DMA_CPL" },
+  { 23040, "MPL_SN_CPL_SET_0" },
+  { 23041, "MPL_SN_CPL_SET_1" },
+  { 23042, "MPL_SN_CPL_SET_2" },
+  { 23043, "MPL_SN_CPL_SET_3" },
+  { 23044, "MPL_SN_CPL" },
+  { 23552, "MPL_DOUBLE_FAULT_SET_0" },
+  { 23553, "MPL_DOUBLE_FAULT_SET_1" },
+  { 23554, "MPL_DOUBLE_FAULT_SET_2" },
+  { 23555, "MPL_DOUBLE_FAULT_SET_3" },
+  { 23556, "MPL_DOUBLE_FAULT" },
+  { 23557, "LAST_INTERRUPT_REASON" },
+  { 24064, "MPL_SN_STATIC_ACCESS_SET_0" },
+  { 24065, "MPL_SN_STATIC_ACCESS_SET_1" },
+  { 24066, "MPL_SN_STATIC_ACCESS_SET_2" },
+  { 24067, "MPL_SN_STATIC_ACCESS_SET_3" },
+  { 24068, "MPL_SN_STATIC_ACCESS" },
+  { 24069, "SN_STATIC_CTL" },
+  { 24070, "SN_STATIC_FIFO_DATA" },
+  { 24071, "SN_STATIC_FIFO_SEL" },
+  { 24073, "SN_STATIC_ISTATE" },
+  { 24074, "SN_STATIC_OSTATE" },
+  { 24076, "SN_STATIC_STATIC" },
+  { 24320, "SN_STATIC_DATA_AVAIL" },
+  { 24576, "MPL_AUX_PERF_COUNT_SET_0" },
+  { 24577, "MPL_AUX_PERF_COUNT_SET_1" },
+  { 24578, "MPL_AUX_PERF_COUNT_SET_2" },
+  { 24579, "MPL_AUX_PERF_COUNT_SET_3" },
+  { 24580, "MPL_AUX_PERF_COUNT" },
+  { 24581, "AUX_PERF_COUNT_0" },
+  { 24582, "AUX_PERF_COUNT_1" },
+  { 24583, "AUX_PERF_COUNT_CTL" },
+  { 24584, "AUX_PERF_COUNT_STS" },
+};
+
+const int tile_num_sprs = 499;
+
+
+
+
+/* Canonical name of each register. */
+const char *const tile_register_names[] =
+{
+  "r0",   "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+  "r8",   "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16",  "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24",  "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+  "r32",  "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40",  "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48",  "r49", "r50", "r51", "r52", "tp",  "sp",  "lr",
+  "sn",  "idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero"
+};
+
+
+/* Given a set of bundle bits and the lookup FSM for a specific pipe,
+ * returns which instruction the bundle contains in that pipe.
+ */
+static const struct tile_opcode *
+find_opcode(tile_bundle_bits bits, const unsigned short *table)
+{
+  int index = 0;
+
+  while (1)
+  {
+    unsigned short bitspec = table[index];
+    unsigned int bitfield =
+      ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6);
+
+    unsigned short next = table[index + 1 + bitfield];
+    if (next <= TILE_OPC_NONE)
+      return &tile_opcodes[next];
+
+    index = next - TILE_OPC_NONE;
+  }
+}
+
+
+int
+parse_insn_tile(tile_bundle_bits bits,
+                unsigned int pc,
+                struct tile_decoded_instruction
+                decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE])
+{
+  int num_instructions = 0;
+  int pipe;
+
+  int min_pipe, max_pipe;
+  if ((bits & TILE_BUNDLE_Y_ENCODING_MASK) == 0)
+  {
+    min_pipe = TILE_PIPELINE_X0;
+    max_pipe = TILE_PIPELINE_X1;
+  }
+  else
+  {
+    min_pipe = TILE_PIPELINE_Y0;
+    max_pipe = TILE_PIPELINE_Y2;
+  }
+
+  /* For each pipe, find an instruction that fits. */
+  for (pipe = min_pipe; pipe <= max_pipe; pipe++)
+  {
+    const struct tile_opcode *opc;
+    struct tile_decoded_instruction *d;
+    int i;
+
+    d = &decoded[num_instructions++];
+    opc = find_opcode (bits, tile_bundle_decoder_fsms[pipe]);
+    d->opcode = opc;
+
+    /* Decode each operand, sign extending, etc. as appropriate. */
+    for (i = 0; i < opc->num_operands; i++)
+    {
+      const struct tile_operand *op =
+        &tile_operands[opc->operands[pipe][i]];
+      int opval = op->extract (bits);
+      if (op->is_signed)
+      {
+        /* Sign-extend the operand. */
+        int shift = (int)((sizeof(int) * 8) - op->num_bits);
+        opval = (opval << shift) >> shift;
+      }
+
+      /* Adjust PC-relative scaled branch offsets. */
+      if (op->type == TILE_OP_TYPE_ADDRESS)
+      {
+        opval *= TILE_BUNDLE_SIZE_IN_BYTES;
+        opval += (int)pc;
+      }
+
+      /* Record the final value. */
+      d->operands[i] = op;
+      d->operand_values[i] = opval;
+    }
+  }
+
+  return num_instructions;
+}
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
new file mode 100644
index 0000000..47500a3
--- /dev/null
+++ b/arch/tile/kernel/time.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Support the cycle counter clocksource and tile timer clock event device.
+ */
+
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <asm/irq_regs.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+
+/*
+ * Define the cycle counter clock source.
+ */
+
+/* How many cycles per second we are running at. */
+static cycles_t cycles_per_sec __write_once;
+
+/*
+ * We set up shift and multiply values with a minsec of five seconds,
+ * since our timer counter counts down 31 bits at a frequency of
+ * no less than 500 MHz.  See @minsec for clocks_calc_mult_shift().
+ * We could use a different value for the 64-bit free-running
+ * cycle counter, but we use the same one for consistency, and since
+ * we will be reasonably precise with this value anyway.
+ */
+#define TILE_MINSEC 5
+
+cycles_t get_clock_rate()
+{
+	return cycles_per_sec;
+}
+
+#if CHIP_HAS_SPLIT_CYCLE()
+cycles_t get_cycles()
+{
+	unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH);
+	unsigned int low = __insn_mfspr(SPR_CYCLE_LOW);
+	unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+
+	while (unlikely(high != high2)) {
+		low = __insn_mfspr(SPR_CYCLE_LOW);
+		high = high2;
+		high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+	}
+
+	return (((cycles_t)high) << 32) | low;
+}
+#endif
+
+cycles_t clocksource_get_cycles(struct clocksource *cs)
+{
+	return get_cycles();
+}
+
+static struct clocksource cycle_counter_cs = {
+	.name = "cycle counter",
+	.rating = 300,
+	.read = clocksource_get_cycles,
+	.mask = CLOCKSOURCE_MASK(64),
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
+ * Called very early from setup_arch() to set cycles_per_sec.
+ * We initialize it early so we can use it to set up loops_per_jiffy.
+ */
+void __init setup_clock(void)
+{
+	cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
+	clocksource_calc_mult_shift(&cycle_counter_cs, cycles_per_sec,
+				    TILE_MINSEC);
+}
+
+void __init calibrate_delay(void)
+{
+	loops_per_jiffy = get_clock_rate() / HZ;
+	pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
+		loops_per_jiffy/(500000/HZ),
+		(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
+}
+
+/* Called fairly late in init/main.c, but before we go smp. */
+void __init time_init(void)
+{
+	/* Initialize and register the clock source. */
+	clocksource_register(&cycle_counter_cs);
+
+	/* Start up the tile-timer interrupt source on the boot cpu. */
+	setup_tile_timer();
+}
+
+
+/*
+ * Define the tile timer clock event device.  The timer is driven by
+ * the TILE_TIMER_CONTROL register, which consists of a 31-bit down
+ * counter, plus bit 31, which signifies that the counter has wrapped
+ * from zero to (2**31) - 1.  The INT_TILE_TIMER interrupt will be
+ * raised as long as bit 31 is set.
+ */
+
+#define MAX_TICK 0x7fffffff   /* we have 31 bits of countdown timer */
+
+static int tile_timer_set_next_event(unsigned long ticks,
+				     struct clock_event_device *evt)
+{
+	BUG_ON(ticks > MAX_TICK);
+	__insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks);
+	raw_local_irq_unmask_now(INT_TILE_TIMER);
+	return 0;
+}
+
+/*
+ * Whenever anyone tries to change modes, we just mask interrupts
+ * and wait for the next event to get set.
+ */
+static void tile_timer_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
+{
+	raw_local_irq_mask_now(INT_TILE_TIMER);
+}
+
+/*
+ * Set min_delta_ns to 1 microsecond, since it takes about
+ * that long to fire the interrupt.
+ */
+static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = {
+	.name = "tile timer",
+	.features = CLOCK_EVT_FEAT_ONESHOT,
+	.min_delta_ns = 1000,
+	.rating = 100,
+	.irq = -1,
+	.set_next_event = tile_timer_set_next_event,
+	.set_mode = tile_timer_set_mode,
+};
+
+void __cpuinit setup_tile_timer(void)
+{
+	struct clock_event_device *evt = &__get_cpu_var(tile_timer);
+
+	/* Fill in fields that are speed-specific. */
+	clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC);
+	evt->max_delta_ns = clockevent_delta2ns(MAX_TICK, evt);
+
+	/* Mark as being for this cpu only. */
+	evt->cpumask = cpumask_of(smp_processor_id());
+
+	/* Start out with timer not firing. */
+	raw_local_irq_mask_now(INT_TILE_TIMER);
+
+	/* Register tile timer. */
+	clockevents_register_device(evt);
+}
+
+/* Called from the interrupt vector. */
+void do_timer_interrupt(struct pt_regs *regs, int fault_num)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct clock_event_device *evt = &__get_cpu_var(tile_timer);
+
+	/*
+	 * Mask the timer interrupt here, since we are a oneshot timer
+	 * and there are now by definition no events pending.
+	 */
+	raw_local_irq_mask(INT_TILE_TIMER);
+
+	/* Track time spent here in an interrupt context */
+	irq_enter();
+
+	/* Track interrupt count. */
+	__get_cpu_var(irq_stat).irq_timer_count++;
+
+	/* Call the generic timer handler */
+	evt->event_handler(evt);
+
+	/*
+	 * Track time spent against the current process again and
+	 * process any softirqs if they are waiting.
+	 */
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * Note that with LOCKDEP, this is called during lockdep_init(), and
+ * we will claim that sched_clock() is zero for a little while, until
+ * we run setup_clock(), above.
+ */
+unsigned long long sched_clock(void)
+{
+	return clocksource_cyc2ns(get_cycles(),
+				  cycle_counter_cs.mult,
+				  cycle_counter_cs.shift);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
new file mode 100644
index 0000000..2dffc10
--- /dev/null
+++ b/arch/tile/kernel/tlb.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+#include <hv/hypervisor.h>
+
+/* From tlbflush.h */
+DEFINE_PER_CPU(int, current_asid);
+int min_asid, max_asid;
+
+/*
+ * Note that we flush the L1I (for VM_EXEC pages) as well as the TLB
+ * so that when we are unmapping an executable page, we also flush it.
+ * Combined with flushing the L1I at context switch time, this means
+ * we don't have to do any other icache flushes.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	HV_Remote_ASID asids[NR_CPUS];
+	int i = 0, cpu;
+	for_each_cpu(cpu, &mm->cpu_vm_mask) {
+		HV_Remote_ASID *asid = &asids[i++];
+		asid->y = cpu / smp_topology.width;
+		asid->x = cpu % smp_topology.width;
+		asid->asid = per_cpu(current_asid, cpu);
+	}
+	flush_remote(0, HV_FLUSH_EVICT_L1I, &mm->cpu_vm_mask,
+		     0, 0, 0, NULL, asids, i);
+}
+
+void flush_tlb_current_task(void)
+{
+	flush_tlb_mm(current->mm);
+}
+
+void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm,
+		       unsigned long va)
+{
+	unsigned long size = hv_page_size(vma);
+	int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
+	flush_remote(0, cache, &mm->cpu_vm_mask,
+		     va, size, size, &mm->cpu_vm_mask, NULL, 0);
+}
+
+void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va)
+{
+	flush_tlb_page_mm(vma, vma->vm_mm, va);
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+void flush_tlb_range(const struct vm_area_struct *vma,
+		     unsigned long start, unsigned long end)
+{
+	unsigned long size = hv_page_size(vma);
+	struct mm_struct *mm = vma->vm_mm;
+	int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
+	flush_remote(0, cache, &mm->cpu_vm_mask, start, end - start, size,
+		     &mm->cpu_vm_mask, NULL, 0);
+}
+
+void flush_tlb_all(void)
+{
+	int i;
+	for (i = 0; ; ++i) {
+		HV_VirtAddrRange r = hv_inquire_virtual(i);
+		if (r.size == 0)
+			break;
+		flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+			     r.start, r.size, PAGE_SIZE, cpu_online_mask,
+			     NULL, 0);
+		flush_remote(0, 0, NULL,
+			     r.start, r.size, HPAGE_SIZE, cpu_online_mask,
+			     NULL, 0);
+	}
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+		     start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0);
+}
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
new file mode 100644
index 0000000..12cb10f
--- /dev/null
+++ b/arch/tile/kernel/traps.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/opcode-tile.h>
+
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+void __init trap_init(void)
+{
+	/* Nothing needed here since we link code at .intrpt1 */
+}
+
+int unaligned_fixup = 1;
+
+static int __init setup_unaligned_fixup(char *str)
+{
+	/*
+	 * Say "=-1" to completely disable it.  If you just do "=0", we
+	 * will still parse the instruction, then fire a SIGBUS with
+	 * the correct address from inside the single_step code.
+	 */
+	long val;
+	if (strict_strtol(str, 0, &val) != 0)
+		return 0;
+	unaligned_fixup = val;
+	printk("Fixups for unaligned data accesses are %s\n",
+	       unaligned_fixup >= 0 ?
+	       (unaligned_fixup ? "enabled" : "disabled") :
+	       "completely disabled");
+	return 1;
+}
+__setup("unaligned_fixup=", setup_unaligned_fixup);
+
+#if CHIP_HAS_TILE_DMA()
+
+static int dma_disabled;
+
+static int __init nodma(char *str)
+{
+	printk("User-space DMA is disabled\n");
+	dma_disabled = 1;
+	return 1;
+}
+__setup("nodma", nodma);
+
+/* How to decode SPR_GPV_REASON */
+#define IRET_ERROR (1U << 31)
+#define MT_ERROR   (1U << 30)
+#define MF_ERROR   (1U << 29)
+#define SPR_INDEX  ((1U << 15) - 1)
+#define SPR_MPL_SHIFT  9  /* starting bit position for MPL encoded in SPR */
+
+/*
+ * See if this GPV is just to notify the kernel of SPR use and we can
+ * retry the user instruction after adjusting some MPLs suitably.
+ */
+static int retry_gpv(unsigned int gpv_reason)
+{
+	int mpl;
+
+	if (gpv_reason & IRET_ERROR)
+		return 0;
+
+	BUG_ON((gpv_reason & (MT_ERROR|MF_ERROR)) == 0);
+	mpl = (gpv_reason & SPR_INDEX) >> SPR_MPL_SHIFT;
+	if (mpl == INT_DMA_NOTIFY && !dma_disabled) {
+		/* User is turning on DMA. Allow it and retry. */
+		printk(KERN_DEBUG "Process %d/%s is now enabled for DMA\n",
+		       current->pid, current->comm);
+		BUG_ON(current->thread.tile_dma_state.enabled);
+		current->thread.tile_dma_state.enabled = 1;
+		grant_dma_mpls();
+		return 1;
+	}
+
+	return 0;
+}
+
+#endif /* CHIP_HAS_TILE_DMA() */
+
+/* Defined inside do_trap(), below. */
+#ifdef __tilegx__
+extern tilegx_bundle_bits bpt_code;
+#else
+extern tile_bundle_bits bpt_code;
+#endif
+
+void __kprobes do_trap(struct pt_regs *regs, int fault_num,
+		       unsigned long reason)
+{
+	siginfo_t info = { 0 };
+	int signo, code;
+	unsigned long address;
+	__typeof__(bpt_code) instr;
+
+	/* Re-enable interrupts. */
+	local_irq_enable();
+
+	/*
+	 * If it hits in kernel mode and we can't fix it up, just exit the
+	 * current process and hope for the best.
+	 */
+	if (!user_mode(regs)) {
+		if (fixup_exception(regs))  /* only UNALIGN_DATA in practice */
+			return;
+		printk(KERN_ALERT "Kernel took bad trap %d at PC %#lx\n",
+		       fault_num, regs->pc);
+		if (fault_num == INT_GPV)
+			printk(KERN_ALERT "GPV_REASON is %#lx\n", reason);
+		show_regs(regs);
+		do_exit(SIGKILL);  /* FIXME: implement i386 die() */
+		return;
+	}
+
+	switch (fault_num) {
+	case INT_ILL:
+		asm(".pushsection .rodata.bpt_code,\"a\";"
+		    ".align 8;"
+		    "bpt_code: bpt;"
+		    ".size bpt_code,.-bpt_code;"
+		    ".popsection");
+
+		if (copy_from_user(&instr, (void *)regs->pc, sizeof(instr))) {
+			printk(KERN_ERR "Unreadable instruction for INT_ILL:"
+			       " %#lx\n", regs->pc);
+			do_exit(SIGKILL);
+			return;
+		}
+		if (instr == bpt_code) {
+			signo = SIGTRAP;
+			code = TRAP_BRKPT;
+		} else {
+			signo = SIGILL;
+			code = ILL_ILLOPC;
+		}
+		address = regs->pc;
+		break;
+	case INT_GPV:
+#if CHIP_HAS_TILE_DMA()
+		if (retry_gpv(reason))
+			return;
+#endif
+		/*FALLTHROUGH*/
+	case INT_UDN_ACCESS:
+	case INT_IDN_ACCESS:
+#if CHIP_HAS_SN()
+	case INT_SN_ACCESS:
+#endif
+		signo = SIGILL;
+		code = ILL_PRVREG;
+		address = regs->pc;
+		break;
+	case INT_SWINT_3:
+	case INT_SWINT_2:
+	case INT_SWINT_0:
+		signo = SIGILL;
+		code = ILL_ILLTRP;
+		address = regs->pc;
+		break;
+	case INT_UNALIGN_DATA:
+#ifndef __tilegx__  /* FIXME: GX: no single-step yet */
+		if (unaligned_fixup >= 0) {
+			struct single_step_state *state =
+				current_thread_info()->step_state;
+			if (!state || (void *)(regs->pc) != state->buffer) {
+				single_step_once(regs);
+				return;
+			}
+		}
+#endif
+		signo = SIGBUS;
+		code = BUS_ADRALN;
+		address = 0;
+		break;
+	case INT_DOUBLE_FAULT:
+		/*
+		 * For double fault, "reason" is actually passed as
+		 * SYSTEM_SAVE_1_2, the hypervisor's double-fault info, so
+		 * we can provide the original fault number rather than
+		 * the uninteresting "INT_DOUBLE_FAULT" so the user can
+		 * learn what actually struck while PL0 ICS was set.
+		 */
+		fault_num = reason;
+		signo = SIGILL;
+		code = ILL_DBLFLT;
+		address = regs->pc;
+		break;
+#ifdef __tilegx__
+	case INT_ILL_TRANS:
+		signo = SIGSEGV;
+		code = SEGV_MAPERR;
+		if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK)
+			address = regs->pc;
+		else
+			address = 0;  /* FIXME: GX: single-step for address */
+		break;
+#endif
+	default:
+		panic("Unexpected do_trap interrupt number %d", fault_num);
+		return;
+	}
+
+	info.si_signo = signo;
+	info.si_code = code;
+	info.si_addr = (void *)address;
+	if (signo == SIGILL)
+		info.si_trapno = fault_num;
+	force_sig_info(signo, &info, current);
+}
+
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
+void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
+{
+	_dump_stack(dummy, pc, lr, sp, r52);
+	printk("Double fault: exiting\n");
+	machine_halt();
+}
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
new file mode 100644
index 0000000..77388c1
--- /dev/null
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -0,0 +1,98 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <hv/hypervisor.h>
+
+/* Text loads starting from the supervisor interrupt vector address. */
+#define TEXT_OFFSET MEM_SV_INTRPT
+
+OUTPUT_ARCH(tile)
+ENTRY(_start)
+jiffies = jiffies_64;
+
+PHDRS
+{
+  intrpt1 PT_LOAD ;
+  text PT_LOAD ;
+  data PT_LOAD ;
+}
+SECTIONS
+{
+  /* Text is loaded with a different VA than data; start with text. */
+  #undef LOAD_OFFSET
+  #define LOAD_OFFSET TEXT_OFFSET
+
+  /* Interrupt vectors */
+  .intrpt1 (LOAD_OFFSET) : AT ( 0 )   /* put at the start of physical memory */
+  {
+    _text = .;
+    _stext = .;
+    *(.intrpt1)
+  } :intrpt1 =0
+
+  /* Hypervisor call vectors */
+  #include "hvglue.lds"
+
+  /* Now the real code */
+  . = ALIGN(0x20000);
+  HEAD_TEXT_SECTION :text =0
+  .text : AT (ADDR(.text) - LOAD_OFFSET) {
+    SCHED_TEXT
+    LOCK_TEXT
+    __fix_text_end = .;   /* tile-cpack won't rearrange before this */
+    TEXT_TEXT
+    *(.text.*)
+    *(.coldtext*)
+    *(.fixup)
+    *(.gnu.warning)
+  }
+  _etext = .;
+
+  /* "Init" is divided into two areas with very different virtual addresses. */
+  INIT_TEXT_SECTION(PAGE_SIZE)
+
+  /* Now we skip back to PAGE_OFFSET for the data. */
+  . = (. - TEXT_OFFSET + PAGE_OFFSET);
+  #undef LOAD_OFFSET
+  #define LOAD_OFFSET PAGE_OFFSET
+
+  . = ALIGN(PAGE_SIZE);
+  VMLINUX_SYMBOL(_sinitdata) = .;
+  .init.page : AT (ADDR(.init.page) - LOAD_OFFSET) {
+    *(.init.page)
+  } :data =0
+  INIT_DATA_SECTION(16)
+  PERCPU(PAGE_SIZE)
+  . = ALIGN(PAGE_SIZE);
+  VMLINUX_SYMBOL(_einitdata) = .;
+
+  _sdata = .;                   /* Start of data section */
+
+  RO_DATA_SECTION(PAGE_SIZE)
+
+  /* initially writeable, then read-only */
+  . = ALIGN(PAGE_SIZE);
+  __w1data_begin = .;
+  .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) {
+    VMLINUX_SYMBOL(__w1data_begin) = .;
+    *(.w1data)
+    VMLINUX_SYMBOL(__w1data_end) = .;
+  }
+
+  RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+  _edata = .;
+
+  EXCEPTION_TABLE(L2_CACHE_BYTES)
+  NOTES
+
+
+  BSS_SECTION(8, PAGE_SIZE, 1)
+  _end = . ;
+
+  STABS_DEBUG
+  DWARF_DEBUG
+
+  DISCARDS
+}
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
new file mode 100644
index 0000000..ea9c209
--- /dev/null
+++ b/arch/tile/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for TILE-specific library files..
+#
+
+lib-y = checksum.o cpumask.o delay.o __invalidate_icache.o \
+	mb_incoherent.o uaccess.o \
+	memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
+	strchr_$(BITS).o strlen_$(BITS).o
+
+ifneq ($(CONFIG_TILEGX),y)
+lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
+endif
+
+lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
+
+obj-$(CONFIG_MODULES) += exports.o
diff --git a/arch/tile/lib/__invalidate_icache.S b/arch/tile/lib/__invalidate_icache.S
new file mode 100644
index 0000000..92e7050
--- /dev/null
+++ b/arch/tile/lib/__invalidate_icache.S
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ * A routine for synchronizing the instruction and data caches.
+ * Useful for self-modifying code.
+ *
+ * r0 holds the buffer address
+ * r1 holds the size in bytes
+ */
+
+#include <arch/chip.h>
+#include <feedback.h>
+
+#if defined(__NEWLIB__) || defined(__BME__)
+#include <sys/page.h>
+#else
+#include <asm/page.h>
+#endif
+
+#ifdef __tilegx__
+/* Share code among Tile family chips but adjust opcodes appropriately. */
+#define slt cmpltu
+#define bbst blbst
+#define bnezt bnzt
+#endif
+
+#if defined(__tilegx__) && __SIZEOF_POINTER__ == 4
+/* Force 32-bit ops so pointers wrap around appropriately. */
+#define ADD_PTR addx
+#define ADDI_PTR addxi
+#else
+#define ADD_PTR add
+#define ADDI_PTR addi
+#endif
+
+        .section .text.__invalidate_icache, "ax"
+        .global __invalidate_icache
+        .type __invalidate_icache,@function
+        .hidden __invalidate_icache
+        .align 8
+__invalidate_icache:
+        FEEDBACK_ENTER(__invalidate_icache)
+        {
+         ADD_PTR r1, r0, r1       /* end of buffer */
+         blez r1, .Lexit      /* skip out if size <= 0 */
+        }
+        {
+         ADDI_PTR r1, r1, -1      /* point to last byte to flush */
+         andi r0, r0, -CHIP_L1I_LINE_SIZE()  /* align to cache-line size */
+        }
+        {
+         andi r1, r1, -CHIP_L1I_LINE_SIZE()  /* last cache line to flush */
+         mf
+        }
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+        {
+         moveli r4, CHIP_L1I_CACHE_SIZE() / PAGE_SIZE  /* loop counter */
+         move r2, r0          /* remember starting address */
+        }
+#endif
+        drain
+	{
+         slt r3, r0, r1       /* set up loop invariant */
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+	 moveli r6, PAGE_SIZE
+#endif
+	}
+.Lentry:
+        {
+         icoh r0
+         ADDI_PTR r0, r0, CHIP_L1I_LINE_SIZE()   /* advance buffer */
+        }
+        {
+         slt r3, r0, r1       /* check if buffer < buffer + size */
+         bbst r3, .Lentry     /* loop if buffer < buffer + size */
+        }
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+        {
+         ADD_PTR r2, r2, r6
+         ADD_PTR r1, r1, r6
+        }
+	{
+         move r0, r2
+         addi r4, r4, -1
+	}
+	{
+         slt r3, r0, r1        /* set up loop invariant */
+         bnezt r4, .Lentry
+	}
+#endif
+        drain
+.Lexit:
+        jrp lr
+
+.Lend___invalidate_icache:
+        .size __invalidate_icache, \
+		.Lend___invalidate_icache - __invalidate_icache
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
new file mode 100644
index 0000000..be1e8ac
--- /dev/null
+++ b/arch/tile/lib/atomic_32.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+#include <arch/chip.h>
+
+/* The routines in atomic_asm.S are private, so we only declare them here. */
+extern struct __get_user __atomic_cmpxchg(volatile int *p,
+					  int *lock, int o, int n);
+extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+						  int *lock, int o, int n);
+extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+
+extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
+extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
+				      int *lock, u64 o, u64 n);
+
+
+/* See <asm/atomic.h> */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/*
+ * A block of memory containing locks for atomic ops. Each instance of this
+ * struct will be homed on a different CPU.
+ */
+struct atomic_locks_on_cpu {
+	int lock[ATOMIC_HASH_L2_SIZE];
+} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
+
+static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
+
+/* The locks we'll use until __init_atomic_per_cpu is called. */
+static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
+
+/* Hash into this vector to get a pointer to lock for the given atomic. */
+struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
+	__write_once = {
+	[0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
+};
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/* This page is remapped on startup to be hash-for-home. */
+int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */]
+  __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+static inline int *__atomic_hashed_lock(volatile void *v)
+{
+	/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	unsigned long i =
+		(unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
+	unsigned long n = __insn_crc32_32(0, i);
+
+	/* Grab high bits for L1 index. */
+	unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
+	/* Grab low bits for L2 index. */
+	unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
+
+	return &atomic_lock_ptr[l1_index]->lock[l2_index];
+#else
+	/*
+	 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
+	 * Using mm works here because atomic_locks is page aligned.
+	 */
+	unsigned long ptr = __insn_mm((unsigned long)v >> 1,
+				      (unsigned long)atomic_locks,
+				      2, (ATOMIC_HASH_SHIFT + 2) - 1);
+	return (int *)ptr;
+#endif
+}
+
+#ifdef CONFIG_SMP
+/* Return whether the passed pointer is a valid atomic lock pointer. */
+static int is_atomic_lock(int *p)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	int i;
+	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+
+		if (p >= &atomic_lock_ptr[i]->lock[0] &&
+		    p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
+			return 1;
+		}
+	}
+	return 0;
+#else
+	return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
+#endif
+}
+
+void __atomic_fault_unlock(int *irqlock_word)
+{
+	BUG_ON(!is_atomic_lock(irqlock_word));
+	BUG_ON(*irqlock_word != 1);
+	*irqlock_word = 0;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline int *__atomic_setup(volatile void *v)
+{
+	/* Issue a load to the target to bring it into cache. */
+	*(volatile int *)v;
+	return __atomic_hashed_lock(v);
+}
+
+int _atomic_xchg(atomic_t *v, int n)
+{
+	return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+}
+EXPORT_SYMBOL(_atomic_xchg);
+
+int _atomic_xchg_add(atomic_t *v, int i)
+{
+	return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add);
+
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+{
+	/*
+	 * Note: argument order is switched here since it is easier
+	 * to use the first argument consistently as the "old value"
+	 * in the assembly, as is done for _atomic_cmpxchg().
+	 */
+	return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
+		.val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add_unless);
+
+int _atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+}
+EXPORT_SYMBOL(_atomic_cmpxchg);
+
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_or);
+
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_andn);
+
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_xor);
+
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n)
+{
+	return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xchg);
+
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+{
+	return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add);
+
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+	/*
+	 * Note: argument order is switched here since it is easier
+	 * to use the first argument consistently as the "old value"
+	 * in the assembly, as is done for _atomic_cmpxchg().
+	 */
+	return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
+					  u, a);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add_unless);
+
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+	return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+}
+EXPORT_SYMBOL(_atomic64_cmpxchg);
+
+
+static inline int *__futex_setup(__user int *v)
+{
+	/*
+	 * Issue a prefetch to the counter to bring it into cache.
+	 * As for __atomic_setup, but we can't do a read into the L1
+	 * since it might fault; instead we do a prefetch into the L2.
+	 */
+	__insn_prefetch(v);
+	return __atomic_hashed_lock(v);
+}
+
+struct __get_user futex_set(int *v, int i)
+{
+	return __atomic_xchg(v, __futex_setup(v), i);
+}
+
+struct __get_user futex_add(int *v, int n)
+{
+	return __atomic_xchg_add(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_or(int *v, int n)
+{
+	return __atomic_or(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_andn(int *v, int n)
+{
+	return __atomic_andn(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_xor(int *v, int n)
+{
+	return __atomic_xor(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_cmpxchg(int *v, int o, int n)
+{
+	return __atomic_cmpxchg(v, __futex_setup(v), o, n);
+}
+
+/*
+ * If any of the atomic or futex routines hit a bad address (not in
+ * the page tables at kernel PL) this routine is called.  The futex
+ * routines are never used on kernel space, and the normal atomics and
+ * bitops are never used on user space.  So a fault on kernel space
+ * must be fatal, but a fault on userspace is a futex fault and we
+ * need to return -EFAULT.  Note that the context this routine is
+ * invoked in is the context of the "_atomic_xxx()" routines called
+ * by the functions in this file.
+ */
+struct __get_user __atomic_bad_address(int *addr)
+{
+	if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
+		panic("Bad address used for kernel atomic op: %p\n", addr);
+	return (struct __get_user) { .err = -EFAULT };
+}
+
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+static int __init noatomichash(char *str)
+{
+	printk("noatomichash is deprecated.\n");
+	return 1;
+}
+__setup("noatomichash", noatomichash);
+#endif
+
+void __init __init_atomic_per_cpu(void)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+	unsigned int i;
+	int actual_cpu;
+
+	/*
+	 * Before this is called from setup, we just have one lock for
+	 * all atomic objects/operations.  Here we replace the
+	 * elements of atomic_lock_ptr so that they point at per_cpu
+	 * integers.  This seemingly over-complex approach stems from
+	 * the fact that DEFINE_PER_CPU defines an entry for each cpu
+	 * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1.  But
+	 * for efficient hashing of atomics to their locks we want a
+	 * compile time constant power of 2 for the size of this
+	 * table, so we use ATOMIC_HASH_SIZE.
+	 *
+	 * Here we populate atomic_lock_ptr from the per cpu
+	 * atomic_lock_pool, interspersing by actual cpu so that
+	 * subsequent elements are homed on consecutive cpus.
+	 */
+
+	actual_cpu = cpumask_first(cpu_possible_mask);
+
+	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+		/*
+		 * Preincrement to slightly bias against using cpu 0,
+		 * which has plenty of stuff homed on it already.
+		 */
+		actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
+		if (actual_cpu >= nr_cpu_ids)
+			actual_cpu = cpumask_first(cpu_possible_mask);
+
+		atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
+	}
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+	/* Validate power-of-two and "bigger than cpus" assumption */
+	BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
+	BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
+
+	/*
+	 * On TILEPro we prefer to use a single hash-for-home
+	 * page, since this means atomic operations are less
+	 * likely to encounter a TLB fault and thus should
+	 * in general perform faster.  You may wish to disable
+	 * this in situations where few hash-for-home tiles
+	 * are configured.
+	 */
+	BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
+
+	/* The locks must all fit on one page. */
+	BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
+
+	/*
+	 * We use the page offset of the atomic value's address as
+	 * an index into atomic_locks, excluding the low 3 bits.
+	 * That should not produce more indices than ATOMIC_HASH_SIZE.
+	 */
+	BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+	/* The futex code makes this assumption, so we validate it here. */
+	BUG_ON(sizeof(atomic_t) != sizeof(int));
+}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
new file mode 100644
index 0000000..c0d0585
--- /dev/null
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Support routines for atomic operations.  Each function takes:
+ *
+ * r0: address to manipulate
+ * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG)
+ * r2: new value to write, or for cmpxchg/add_unless, value to compare against
+ * r3: (cmpxchg/xchg_add_unless) new value to write or add;
+ *     (atomic64 ops) high word of value to write
+ * r4/r5: (cmpxchg64/add_unless64) new value to write or add
+ *
+ * The 32-bit routines return a "struct __get_user" so that the futex code
+ * has an opportunity to return -EFAULT to the user if needed.
+ * The 64-bit routines just return a "long long" with the value,
+ * since they are only used from kernel space and don't expect to fault.
+ * Support for 16-bit ops is included in the framework but we don't provide
+ * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ *
+ * Note that the caller is advised to issue a suitable L1 or L2
+ * prefetch on the address being manipulated to avoid extra stalls.
+ * In addition, the hot path is on two icache lines, and we start with
+ * a jump to the second line to make sure they are both in cache so
+ * that we never stall waiting on icache fill while holding the lock.
+ * (This doesn't work out with most 64-bit ops, since they consume
+ * too many bundles, so may take an extra i-cache stall.)
+ *
+ * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
+ * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
+ * the code, just page faults.
+ *
+ * If the load or store faults in a way that can be directly fixed in
+ * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
+ * directly, return to the instruction that faulted, and retry it.
+ *
+ * If the load or store faults in a way that potentially requires us
+ * to release the atomic lock, then retry (e.g. a migrating PTE), we
+ * reset the PC in do_page_fault_ics() to the "tns" instruction so
+ * that on return we will reacquire the lock and restart the op.  We
+ * are somewhat overloading the exception_table_entry notion by doing
+ * this, since those entries are not normally used for migrating PTEs.
+ *
+ * If the main page fault handler discovers a bad address, it will see
+ * the PC pointing to the "tns" instruction (due to the earlier
+ * exception_table_entry processing in do_page_fault_ics), and
+ * re-reset the PC to the fault handler, atomic_bad_address(), which
+ * effectively takes over from the atomic op and can either return a
+ * bad "struct __get_user" (for user addresses) or can just panic (for
+ * bad kernel addresses).
+ *
+ * Note that if the value we would store is the same as what we
+ * loaded, we bypass the load.  Other platforms with true atomics can
+ * make the guarantee that a non-atomic __clear_bit(), for example,
+ * can safely race with an atomic test_and_set_bit(); this example is
+ * from bit_spinlock.h in slub_lock() / slub_unlock().  We can't do
+ * that on Tile since the "atomic" op is really just a
+ * read/modify/write, and can race with the non-atomic
+ * read/modify/write.  However, if we can short-circuit the write when
+ * it is not needed, in the atomic case, we avoid the race.
+ */
+
+#include <linux/linkage.h>
+#include <asm/atomic.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+	.section .text.atomic,"ax"
+ENTRY(__start_atomic_asm_code)
+
+	.macro  atomic_op, name, bitwidth, body
+	.align  64
+STD_ENTRY_SECTION(__atomic\name, .text.atomic)
+	{
+	 movei  r24, 1
+	 j      4f		/* branch to second cache line */
+	}
+1:	{
+	 .ifc \bitwidth,16
+	 lh     r22, r0
+	 .else
+	 lw     r22, r0
+	 addi   r23, r0, 4
+	 .endif
+	}
+	.ifc \bitwidth,64
+	lw      r23, r23
+	.endif
+	\body /* set r24, and r25 if 64-bit */
+	{
+	 seq    r26, r22, r24
+	 seq    r27, r23, r25
+	}
+	.ifc \bitwidth,64
+	bbnst   r27, 2f
+	.endif
+	bbs     r26, 3f		/* skip write-back if it's the same value */
+2:	{
+	 .ifc \bitwidth,16
+	 sh     r0, r24
+	 .else
+	 sw     r0, r24
+	 addi   r23, r0, 4
+	 .endif
+	}
+	.ifc \bitwidth,64
+	sw      r23, r25
+	.endif
+	mf
+3:	{
+	 move   r0, r22
+	 .ifc \bitwidth,64
+	 move   r1, r23
+	 .else
+	 move   r1, zero
+	 .endif
+	 sw     ATOMIC_LOCK_REG_NAME, zero
+	}
+	mtspr   INTERRUPT_CRITICAL_SECTION, zero
+	jrp     lr
+4:	{
+	 move   ATOMIC_LOCK_REG_NAME, r1
+	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
+	}
+#ifndef CONFIG_SMP
+	j       1b		/* no atomic locks */
+#else
+	{
+	 tns    r21, ATOMIC_LOCK_REG_NAME
+	 moveli r23, 2048       /* maximum backoff time in cycles */
+	}
+	{
+	 bzt    r21, 1b		/* branch if lock acquired */
+	 moveli r25, 32         /* starting backoff time in cycles */
+	}
+5:	mtspr   INTERRUPT_CRITICAL_SECTION, zero
+	mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
+6:	mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
+	sub     r22, r22, r26
+	slt     r22, r22, r25
+	bbst    r22, 6b
+	{
+	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
+	 shli   r25, r25, 1     /* double the backoff; retry the tns */
+	}
+	{
+	 tns    r21, ATOMIC_LOCK_REG_NAME
+	 slt    r26, r23, r25   /* is the proposed backoff too big? */
+	}
+	{
+	 bzt    r21, 1b		/* branch if lock acquired */
+	 mvnz   r25, r26, r23
+	}
+	j       5b
+#endif
+	STD_ENDPROC(__atomic\name)
+	.ifc \bitwidth,32
+	.pushsection __ex_table,"a"
+	.word   1b, __atomic\name
+	.word   2b, __atomic\name
+	.word   __atomic\name, __atomic_bad_address
+	.popsection
+	.endif
+	.endm
+
+atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op _xchg, 32, "move r24, r2"
+atomic_op _xchg_add, 32, "add r24, r22, r2"
+atomic_op _xchg_add_unless, 32, \
+	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
+atomic_op _or, 32, "or r24, r22, r2"
+atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op _xor, 32, "xor r24, r22, r2"
+
+atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
+	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
+atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
+atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
+	slt_u r26, r24, r22; add r25, r25, r26"
+atomic_op 64_xchg_add_unless, 64, \
+	"{ sne r26, r22, r2; sne r27, r23, r3 }; \
+	{ bbns r26, 3f; add r24, r22, r4 }; \
+	{ bbns r27, 3f; add r25, r23, r5 }; \
+	slt_u r26, r24, r22; add r25, r25, r26"
+
+	jrp     lr              /* happy backtracer */
+
+ENTRY(__end_atomic_asm_code)
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
new file mode 100644
index 0000000..e4bab5b
--- /dev/null
+++ b/arch/tile/lib/checksum.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ * Support code for the main lib/checksum.c.
+ */
+
+#include <net/checksum.h>
+#include <linux/module.h>
+
+static inline unsigned int longto16(unsigned long x)
+{
+	unsigned long ret;
+#ifdef __tilegx__
+	ret = __insn_v2sadu(x, 0);
+	ret = __insn_v2sadu(ret, 0);
+#else
+	ret = __insn_sadh_u(x, 0);
+	ret = __insn_sadh_u(ret, 0);
+#endif
+	return ret;
+}
+
+__wsum do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = (*buff << 8);
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(const unsigned short *)buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+#ifdef __tilegx__
+			if (4 & (unsigned long) buff) {
+				unsigned int w = *(const unsigned int *)buff;
+				result = __insn_v2sadau(result, w, 0);
+				count--;
+				len -= 4;
+				buff += 4;
+			}
+			count >>= 1;		/* nr of 64-bit words.. */
+#endif
+
+			/*
+			 * This algorithm could wrap around for very
+			 * large buffers, but those should be impossible.
+			 */
+			BUG_ON(count >= 65530);
+
+			while (count) {
+				unsigned long w = *(const unsigned long *)buff;
+				count--;
+				buff += sizeof(w);
+#ifdef __tilegx__
+				result = __insn_v2sadau(result, w, 0);
+#else
+				result = __insn_sadah_u(result, w, 0);
+#endif
+			}
+#ifdef __tilegx__
+			if (len & 4) {
+				unsigned int w = *(const unsigned int *)buff;
+				result = __insn_v2sadau(result, w, 0);
+				buff += 4;
+			}
+#endif
+		}
+		if (len & 2) {
+			result += *(const unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+	result = longto16(result);
+	if (odd)
+		result = swab16(result);
+out:
+	return result;
+}
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
new file mode 100644
index 0000000..af745b3
--- /dev/null
+++ b/arch/tile/lib/cpumask.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+
+/*
+ * Allow cropping out bits beyond the end of the array.
+ * Move to "lib" directory if more clients want to use this routine.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	unsigned a, b;
+
+	bitmap_zero(maskp, nmaskbits);
+	do {
+		if (!isdigit(*bp))
+			return -EINVAL;
+		a = simple_strtoul(bp, (char **)&bp, 10);
+		b = a;
+		if (*bp == '-') {
+			bp++;
+			if (!isdigit(*bp))
+				return -EINVAL;
+			b = simple_strtoul(bp, (char **)&bp, 10);
+		}
+		if (!(a <= b))
+			return -EINVAL;
+		if (b >= nmaskbits)
+			b = nmaskbits-1;
+		while (a <= b) {
+			set_bit(a, maskp);
+			a++;
+		}
+		if (*bp == ',')
+			bp++;
+	} while (*bp != '\0' && *bp != '\n');
+	return 0;
+}
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
new file mode 100644
index 0000000..5801b03
--- /dev/null
+++ b/arch/tile/lib/delay.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/thread_info.h>
+#include <asm/fixmap.h>
+#include <hv/hypervisor.h>
+
+void __udelay(unsigned long usecs)
+{
+	hv_nanosleep(usecs * 1000);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	hv_nanosleep(nsecs);
+}
+EXPORT_SYMBOL(__ndelay);
+
+/* FIXME: should be declared in a header somewhere. */
+EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
new file mode 100644
index 0000000..af8e70e
--- /dev/null
+++ b/arch/tile/lib/exports.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Exports from assembler code and from libtile-cc.
+ */
+
+#include <linux/module.h>
+
+/* arch/tile/lib/usercopy.S */
+#include <linux/uaccess.h>
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+EXPORT_SYMBOL(strnlen_user_asm);
+EXPORT_SYMBOL(strncpy_from_user_asm);
+EXPORT_SYMBOL(clear_user_asm);
+
+/* arch/tile/kernel/entry.S */
+#include <linux/kernel.h>
+#include <asm/processor.h>
+EXPORT_SYMBOL(current_text_addr);
+EXPORT_SYMBOL(dump_stack);
+
+/* arch/tile/lib/__memcpy.S */
+/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(__copy_to_user_inatomic);
+EXPORT_SYMBOL(__copy_from_user_inatomic);
+EXPORT_SYMBOL(__copy_from_user_zeroing);
+
+/* hypervisor glue */
+#include <hv/hypervisor.h>
+EXPORT_SYMBOL(hv_dev_open);
+EXPORT_SYMBOL(hv_dev_pread);
+EXPORT_SYMBOL(hv_dev_pwrite);
+EXPORT_SYMBOL(hv_dev_close);
+
+/* -ltile-cc */
+uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
+EXPORT_SYMBOL(__udivsi3);
+int32_t __divsi3(int32_t dividend, int32_t divisor);
+EXPORT_SYMBOL(__divsi3);
+uint64_t __udivdi3(uint64_t dividend, uint64_t divisor);
+EXPORT_SYMBOL(__udivdi3);
+int64_t __divdi3(int64_t dividend, int64_t divisor);
+EXPORT_SYMBOL(__divdi3);
+uint32_t __umodsi3(uint32_t dividend, uint32_t divisor);
+EXPORT_SYMBOL(__umodsi3);
+int32_t __modsi3(int32_t dividend, int32_t divisor);
+EXPORT_SYMBOL(__modsi3);
+uint64_t __umoddi3(uint64_t dividend, uint64_t divisor);
+EXPORT_SYMBOL(__umoddi3);
+int64_t __moddi3(int64_t dividend, int64_t divisor);
+EXPORT_SYMBOL(__moddi3);
+#ifndef __tilegx__
+uint64_t __ll_mul(uint64_t n0, uint64_t n1);
+EXPORT_SYMBOL(__ll_mul);
+#endif
+#ifndef __tilegx__
+int64_t __muldi3(int64_t, int64_t);
+EXPORT_SYMBOL(__muldi3);
+uint64_t __lshrdi3(uint64_t, unsigned int);
+EXPORT_SYMBOL(__lshrdi3);
+#endif
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S
new file mode 100644
index 0000000..989ad7b
--- /dev/null
+++ b/arch/tile/lib/mb_incoherent.S
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Assembly code for invoking the HV's fence_incoherent syscall.
+ */
+
+#include <linux/linkage.h>
+#include <hv/syscall_public.h>
+#include <arch/abi.h>
+#include <arch/chip.h>
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+
+/*
+ * Invoke the hypervisor's fence_incoherent syscall, which guarantees
+ * that all victims for cachelines homed on this tile have reached memory.
+ */
+STD_ENTRY(__mb_incoherent)
+	moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent
+	swint2
+	jrp lr
+	STD_ENDPROC(__mb_incoherent)
+
+#endif
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
new file mode 100644
index 0000000..6235283
--- /dev/null
+++ b/arch/tile/lib/memchr_32.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+void *memchr(const void *s, int c, size_t n)
+{
+	/* Get an aligned pointer. */
+	const uintptr_t s_int = (uintptr_t) s;
+	const uint32_t *p = (const uint32_t *)(s_int & -4);
+
+	/* Create four copies of the byte for which we are looking. */
+	const uint32_t goal = 0x01010101 * (uint8_t) c;
+
+	/* Read the first word, but munge it so that bytes before the array
+	 * will not match goal.
+	 *
+	 * Note that this shift count expression works because we know
+	 * shift counts are taken mod 32.
+	 */
+	const uint32_t before_mask = (1 << (s_int << 3)) - 1;
+	uint32_t v = (*p | before_mask) ^ (goal & before_mask);
+
+	/* Compute the address of the last byte. */
+	const char *const last_byte_ptr = (const char *)s + n - 1;
+
+	/* Compute the address of the word containing the last byte. */
+	const uint32_t *const last_word_ptr =
+	    (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
+
+	uint32_t bits;
+	char *ret;
+
+	if (__builtin_expect(n == 0, 0)) {
+		/* Don't dereference any memory if the array is empty. */
+		return NULL;
+	}
+
+	while ((bits = __insn_seqb(v, goal)) == 0) {
+		if (__builtin_expect(p == last_word_ptr, 0)) {
+			/* We already read the last word in the array,
+			 * so give up.
+			 */
+			return NULL;
+		}
+		v = *++p;
+	}
+
+	/* We found a match, but it might be in a byte past the end
+	 * of the array.
+	 */
+	ret = ((char *)p) + (__insn_ctz(bits) >> 3);
+	return (ret <= last_byte_ptr) ? ret : NULL;
+}
+EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
new file mode 100644
index 0000000..f92984b
--- /dev/null
+++ b/arch/tile/lib/memcpy_32.S
@@ -0,0 +1,628 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This file shares the implementation of the userspace memcpy and
+ * the kernel's memcpy, copy_to_user and copy_from_user.
+ */
+
+#include <arch/chip.h>
+
+#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
+#define MEMCPY_USE_WH64
+#endif
+
+
+#include <linux/linkage.h>
+
+/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+#define memcpy __memcpy_asm
+#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
+#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
+#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
+#endif
+
+#define IS_MEMCPY	  0
+#define IS_COPY_FROM_USER  1
+#define IS_COPY_FROM_USER_ZEROING  2
+#define IS_COPY_TO_USER   -1
+
+	.section .text.memcpy_common, "ax"
+	.align 64
+
+/* Use this to preface each bundle that can cause an exception so
+ * the kernel can clean up properly. The special cleanup code should
+ * not use these, since it knows what it is doing.
+ */
+#define EX \
+	.pushsection __ex_table, "a"; \
+	.word 9f, memcpy_common_fixup; \
+	.popsection; \
+	9
+
+
+/* __copy_from_user_inatomic takes the kernel target address in r0,
+ * the user source in r1, and the bytes to copy in r2.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ */
+ENTRY(__copy_from_user_inatomic)
+.type __copy_from_user_inatomic, @function
+        FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \
+	  .text.memcpy_common, \
+          .Lend_memcpy_common - __copy_from_user_inatomic)
+	{ movei r29, IS_COPY_FROM_USER; j memcpy_common }
+	.size __copy_from_user_inatomic, . - __copy_from_user_inatomic
+
+/* __copy_from_user_zeroing is like __copy_from_user_inatomic, but
+ * any uncopiable bytes are zeroed in the target.
+ */
+ENTRY(__copy_from_user_zeroing)
+.type __copy_from_user_zeroing, @function
+        FEEDBACK_REENTER(__copy_from_user_inatomic)
+	{ movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common }
+	.size __copy_from_user_zeroing, . - __copy_from_user_zeroing
+
+/* __copy_to_user_inatomic takes the user target address in r0,
+ * the kernel source in r1, and the bytes to copy in r2.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ */
+ENTRY(__copy_to_user_inatomic)
+.type __copy_to_user_inatomic, @function
+        FEEDBACK_REENTER(__copy_from_user_inatomic)
+	{ movei r29, IS_COPY_TO_USER; j memcpy_common }
+	.size __copy_to_user_inatomic, . - __copy_to_user_inatomic
+
+ENTRY(memcpy)
+.type memcpy, @function
+        FEEDBACK_REENTER(__copy_from_user_inatomic)
+	{ movei r29, IS_MEMCPY }
+	.size memcpy, . - memcpy
+	/* Fall through */
+
+	.type memcpy_common, @function
+memcpy_common:
+	/* On entry, r29 holds one of the IS_* macro values from above. */
+
+
+	/* r0 is the dest, r1 is the source, r2 is the size. */
+
+	/* Save aside original dest so we can return it at the end. */
+	{ sw sp, lr; move r23, r0; or r4, r0, r1 }
+
+	/* Check for an empty size. */
+	{ bz r2, .Ldone; andi r4, r4, 3 }
+
+	/* Save aside original values in case of a fault. */
+	{ move r24, r1; move r25, r2 }
+	move r27, lr
+
+	/* Check for an unaligned source or dest. */
+	{ bnz r4, .Lcopy_unaligned_maybe_many; addli r4, r2, -256 }
+
+.Lcheck_aligned_copy_size:
+	/* If we are copying < 256 bytes, branch to simple case. */
+	{ blzt r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+
+	/* Copying >= 256 bytes, so jump to complex prefetching loop. */
+	{ andi r6, r1, 63; j .Lcopy_many }
+
+/*
+ *
+ * Aligned 4 byte at a time copy loop
+ *
+ */
+
+.Lcopy_8_loop:
+	/* Copy two words at a time to hide load latency. */
+EX:	{ lw r3, r1; addi r1, r1, 4; slti_u r8, r2, 16 }
+EX:	{ lw r4, r1; addi r1, r1, 4 }
+EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+EX:	{ sw r0, r4; addi r0, r0, 4; addi r2, r2, -4 }
+.Lcopy_8_check:
+	{ bzt r8, .Lcopy_8_loop; slti_u r4, r2, 4 }
+
+	/* Copy odd leftover word, if any. */
+	{ bnzt r4, .Lcheck_odd_stragglers }
+EX:	{ lw r3, r1; addi r1, r1, 4 }
+EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+
+.Lcheck_odd_stragglers:
+	{ bnz r2, .Lcopy_unaligned_few }
+
+.Ldone:
+	/* For memcpy return original dest address, else zero. */
+	{ mz r0, r29, r23; jrp lr }
+
+
+/*
+ *
+ * Prefetching multiple cache line copy handler (for large transfers).
+ *
+ */
+
+	/* Copy words until r1 is cache-line-aligned. */
+.Lalign_loop:
+EX:	{ lw r3, r1; addi r1, r1, 4 }
+	{ andi r6, r1, 63 }
+EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+.Lcopy_many:
+	{ bnzt r6, .Lalign_loop; addi r9, r0, 63 }
+
+	{ addi r3, r1, 60; andi r9, r9, -64 }
+
+#ifdef MEMCPY_USE_WH64
+        /* No need to prefetch dst, we'll just do the wh64
+         * right before we copy a line.
+	 */
+#endif
+
+EX:	{ lw r5, r3; addi r3, r3, 64; movei r4, 1 }
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bnzt zero, .; move r27, lr }
+EX:	{ lw r6, r3; addi r3, r3, 64 }
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bnzt zero, . }
+EX:	{ lw r7, r3; addi r3, r3, 64 }
+#ifndef MEMCPY_USE_WH64
+        /* Prefetch the dest */
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bnzt zero, . }
+        /* Use a real load to cause a TLB miss if necessary.  We aren't using
+         * r28, so this should be fine.
+         */
+EX:	{ lw r28, r9; addi r9, r9, 64 }
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bnzt zero, . }
+        { prefetch r9; addi r9, r9, 64 }
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bnzt zero, . }
+        { prefetch r9; addi r9, r9, 64 }
+#endif
+        /* Intentionally stall for a few cycles to leave L2 cache alone. */
+        { bz zero, .Lbig_loop2 }
+
+	/* On entry to this loop:
+	 * - r0 points to the start of dst line 0
+	 * - r1 points to start of src line 0
+	 * - r2 >= (256 - 60), only the first time the loop trips.
+	 * - r3 contains r1 + 128 + 60    [pointer to end of source line 2]
+	 *   This is our prefetch address. When we get near the end
+	 *   rather than prefetching off the end this is changed to point
+	 *   to some "safe" recently loaded address.
+	 * - r5 contains *(r1 + 60)       [i.e. last word of source line 0]
+	 * - r6 contains *(r1 + 64 + 60)  [i.e. last word of source line 1]
+         * - r9 contains ((r0 + 63) & -64)
+	 *     [start of next dst cache line.]
+	 */
+
+.Lbig_loop:
+	{ jal .Lcopy_line2; add r15, r1, r2 }
+
+.Lbig_loop2:
+	/* Copy line 0, first stalling until r5 is ready. */
+EX:	{ move r12, r5; lw r16, r1 }
+	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+        /* Prefetch several lines ahead. */
+EX:	{ lw r5, r3; addi r3, r3, 64 }
+        { jal .Lcopy_line }
+
+	/* Copy line 1, first stalling until r6 is ready. */
+EX:	{ move r12, r6; lw r16, r1 }
+	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+        /* Prefetch several lines ahead. */
+EX:	{ lw r6, r3; addi r3, r3, 64 }
+	{ jal .Lcopy_line }
+
+	/* Copy line 2, first stalling until r7 is ready. */
+EX:	{ move r12, r7; lw r16, r1 }
+	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+        /* Prefetch several lines ahead. */
+EX:	{ lw r7, r3; addi r3, r3, 64 }
+        /* Use up a caches-busy cycle by jumping back to the top of the
+         * loop. Might as well get it out of the way now.
+         */
+        { j .Lbig_loop }
+
+
+	/* On entry:
+	 * - r0 points to the destination line.
+	 * - r1 points to the source line.
+         * - r3 is the next prefetch address.
+	 * - r9 holds the last address used for wh64.
+	 * - r12 = WORD_15
+         * - r16 = WORD_0.
+         * - r17 == r1 + 16.
+         * - r27 holds saved lr to restore.
+	 *
+	 * On exit:
+	 * - r0 is incremented by 64.
+	 * - r1 is incremented by 64, unless that would point to a word
+         *   beyond the end of the source array, in which case it is redirected
+         *   to point to an arbitrary word already in the cache.
+	 * - r2 is decremented by 64.
+         * - r3 is unchanged, unless it points to a word beyond the
+         *   end of the source array, in which case it is redirected
+         *   to point to an arbitrary word already in the cache.
+         *   Redirecting is OK since if we are that close to the end
+         *   of the array we will not come back to this subroutine
+         *   and use the contents of the prefetched address.
+	 * - r4 is nonzero iff r2 >= 64.
+         * - r9 is incremented by 64, unless it points beyond the
+         *   end of the last full destination cache line, in which
+         *   case it is redirected to a "safe address" that can be
+         *   clobbered (sp - 64)
+	 * - lr contains the value in r27.
+	 */
+
+/* r26 unused */
+
+.Lcopy_line:
+        /* TODO: when r3 goes past the end, we would like to redirect it
+         * to prefetch the last partial cache line (if any) just once, for the
+         * benefit of the final cleanup loop. But we don't want to
+         * prefetch that line more than once, or subsequent prefetches
+         * will go into the RTF. But then .Lbig_loop should unconditionally
+         * branch to top of loop to execute final prefetch, and its
+         * nop should become a conditional branch.
+         */
+
+        /* We need two non-memory cycles here to cover the resources
+         * used by the loads initiated by the caller.
+         */
+        { add r15, r1, r2 }
+.Lcopy_line2:
+        { slt_u r13, r3, r15; addi r17, r1, 16 }
+
+        /* NOTE: this will stall for one cycle as L1 is busy. */
+
+        /* Fill second L1D line. */
+EX:	{ lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
+
+#ifdef MEMCPY_TEST_WH64
+        /* Issue a fake wh64 that clobbers the destination words
+         * with random garbage, for testing.
+         */
+	{ movei r19, 64; crc32_32 r10, r2, r9 }
+.Lwh64_test_loop:
+EX:	{ sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
+        { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
+#elif CHIP_HAS_WH64()
+        /* Prepare destination line for writing. */
+EX:	{ wh64 r9; addi r9, r9, 64 }
+#else
+        /* Prefetch dest line */
+	{ prefetch r9; addi r9, r9, 64 }
+#endif
+        /* Load seven words that are L1D hits to cover wh64 L2 usage. */
+
+        /* Load the three remaining words from the last L1D line, which
+         * we know has already filled the L1D.
+         */
+EX:	{ lw r4, r1;  addi r1, r1, 4;   addi r20, r1, 16 }   /* r4 = WORD_12 */
+EX:	{ lw r8, r1;  addi r1, r1, 4;   slt_u r13, r20, r15 }/* r8 = WORD_13 */
+EX:	{ lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 }  /* r11 = WORD_14 */
+
+        /* Load the three remaining words from the first L1D line, first
+         * stalling until it has filled by "looking at" r16.
+         */
+EX:	{ lw r13, r1; addi r1, r1, 4; move zero, r16 }   /* r13 = WORD_1 */
+EX:	{ lw r14, r1; addi r1, r1, 4 }                   /* r14 = WORD_2 */
+EX:	{ lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */
+
+        /* Load second word from the second L1D line, first
+         * stalling until it has filled by "looking at" r17.
+         */
+EX:	{ lw r19, r1; addi r1, r1, 4; move zero, r17 }  /* r19 = WORD_5 */
+
+        /* Store last word to the destination line, potentially dirtying it
+         * for the first time, which keeps the L2 busy for two cycles.
+         */
+EX:	{ sw r10, r12 }                                 /* store(WORD_15) */
+
+        /* Use two L1D hits to cover the sw L2 access above. */
+EX:	{ lw r10, r1; addi r1, r1, 4 }                  /* r10 = WORD_6 */
+EX:	{ lw r12, r1; addi r1, r1, 4 }                  /* r12 = WORD_7 */
+
+        /* Fill third L1D line. */
+EX:	{ lw r18, r1; addi r1, r1, 4 }                  /* r18 = WORD_8 */
+
+        /* Store first L1D line. */
+EX:	{ sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
+EX:	{ sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
+EX:	{ sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
+#ifdef MEMCPY_USE_WH64
+EX:	{ sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
+#else
+        /* Back up the r9 to a cache line we are already storing to
+	 * if it gets past the end of the dest vector.  Strictly speaking,
+	 * we don't need to back up to the start of a cache line, but it's free
+	 * and tidy, so why not?
+         */
+EX:	{ sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
+#endif
+        /* Store second L1D line. */
+EX:	{ sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
+EX:	{ sw r0, r19; addi r0, r0, 4 }                  /* store(WORD_5) */
+EX:	{ sw r0, r10; addi r0, r0, 4 }                  /* store(WORD_6) */
+EX:	{ sw r0, r12; addi r0, r0, 4 }                  /* store(WORD_7) */
+
+EX:	{ lw r13, r1; addi r1, r1, 4; move zero, r18 }  /* r13 = WORD_9 */
+EX:	{ lw r14, r1; addi r1, r1, 4 }                  /* r14 = WORD_10 */
+EX:	{ lw r15, r1; move r1, r20   }                  /* r15 = WORD_11 */
+
+        /* Store third L1D line. */
+EX:	{ sw r0, r18; addi r0, r0, 4 }                  /* store(WORD_8) */
+EX:	{ sw r0, r13; addi r0, r0, 4 }                  /* store(WORD_9) */
+EX:	{ sw r0, r14; addi r0, r0, 4 }                  /* store(WORD_10) */
+EX:	{ sw r0, r15; addi r0, r0, 4 }                  /* store(WORD_11) */
+
+        /* Store rest of fourth L1D line. */
+EX:	{ sw r0, r4;  addi r0, r0, 4 }                  /* store(WORD_12) */
+        {
+EX:	sw r0, r8                                       /* store(WORD_13) */
+        addi r0, r0, 4
+	/* Will r2 be > 64 after we subtract 64 below? */
+        shri r4, r2, 7
+        }
+        {
+EX:	sw r0, r11                                      /* store(WORD_14) */
+        addi r0, r0, 8
+        /* Record 64 bytes successfully copied. */
+        addi r2, r2, -64
+        }
+
+	{ jrp lr; move lr, r27 }
+
+        /* Convey to the backtrace library that the stack frame is size
+	 * zero, and the real return address is on the stack rather than
+	 * in 'lr'.
+	 */
+	{ info 8 }
+
+	.align 64
+.Lcopy_unaligned_maybe_many:
+	/* Skip the setup overhead if we aren't copying many bytes. */
+	{ slti_u r8, r2, 20; sub r4, zero, r0 }
+	{ bnzt r8, .Lcopy_unaligned_few; andi r4, r4, 3 }
+	{ bz r4, .Ldest_is_word_aligned; add r18, r1, r2 }
+
+/*
+ *
+ * unaligned 4 byte at a time copy handler.
+ *
+ */
+
+	/* Copy single bytes until r0 == 0 mod 4, so we can store words. */
+.Lalign_dest_loop:
+EX:	{ lb_u r3, r1; addi r1, r1, 1; addi r4, r4, -1 }
+EX:	{ sb r0, r3;   addi r0, r0, 1; addi r2, r2, -1 }
+	{ bnzt r4, .Lalign_dest_loop; andi r3, r1, 3 }
+
+	/* If source and dest are now *both* aligned, do an aligned copy. */
+	{ bz r3, .Lcheck_aligned_copy_size; addli r4, r2, -256 }
+
+.Ldest_is_word_aligned:
+
+#if CHIP_HAS_DWORD_ALIGN()
+EX:	{ andi r8, r0, 63; lwadd_na r6, r1, 4}
+	{ slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
+
+	/* This copies unaligned words until either there are fewer
+	 * than 4 bytes left to copy, or until the destination pointer
+	 * is cache-aligned, whichever comes first.
+	 *
+	 * On entry:
+	 * - r0 is the next store address.
+	 * - r1 points 4 bytes past the load address corresponding to r0.
+	 * - r2 >= 4
+	 * - r6 is the next aligned word loaded.
+	 */
+.Lcopy_unaligned_src_words:
+EX:	{ lwadd_na r7, r1, 4; slti_u r8, r2, 4 + 4 }
+	/* stall */
+	{ dword_align r6, r7, r1; slti_u r9, r2, 64 + 4 }
+EX:	{ swadd r0, r6, 4; addi r2, r2, -4 }
+	{ bnz r8, .Lcleanup_unaligned_words; andi r8, r0, 63 }
+	{ bnzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
+
+	/* On entry:
+	 * - r0 is the next store address.
+	 * - r1 points 4 bytes past the load address corresponding to r0.
+	 * - r2 >= 4 (# of bytes left to store).
+	 * - r6 is the next aligned src word value.
+	 * - r9 = (r2 < 64U).
+	 * - r18 points one byte past the end of source memory.
+	 */
+.Ldest_is_L2_line_aligned:
+
+	{
+	/* Not a full cache line remains. */
+	bnz r9, .Lcleanup_unaligned_words
+	move r7, r6
+	}
+
+	/* r2 >= 64 */
+
+	/* Kick off two prefetches, but don't go past the end. */
+	{ addi r3, r1, 63 - 4; addi r8, r1, 64 + 63 - 4 }
+	{ prefetch r3; move r3, r8; slt_u r8, r8, r18 }
+	{ mvz r3, r8, r1; addi r8, r3, 64 }
+	{ prefetch r3; move r3, r8; slt_u r8, r8, r18 }
+	{ mvz r3, r8, r1; movei r17, 0 }
+
+.Lcopy_unaligned_line:
+	/* Prefetch another line. */
+	{ prefetch r3; addi r15, r1, 60; addi r3, r3, 64 }
+	/* Fire off a load of the last word we are about to copy. */
+EX:	{ lw_na r15, r15; slt_u r8, r3, r18 }
+
+EX:	{ mvz r3, r8, r1; wh64 r0 }
+
+	/* This loop runs twice.
+	 *
+	 * On entry:
+	 * - r17 is even before the first iteration, and odd before
+	 *   the second.  It is incremented inside the loop.  Encountering
+	 *   an even value at the end of the loop makes it stop.
+	 */
+.Lcopy_half_an_unaligned_line:
+EX:	{
+	/* Stall until the last byte is ready. In the steady state this
+	 * guarantees all words to load below will be in the L2 cache, which
+	 * avoids shunting the loads to the RTF.
+	 */
+	move zero, r15
+	lwadd_na r7, r1, 16
+	}
+EX:	{ lwadd_na r11, r1, 12 }
+EX:	{ lwadd_na r14, r1, -24 }
+EX:	{ lwadd_na r8, r1, 4 }
+EX:	{ lwadd_na r9, r1, 4 }
+EX:	{
+	lwadd_na r10, r1, 8
+	/* r16 = (r2 < 64), after we subtract 32 from r2 below. */
+	slti_u r16, r2, 64 + 32
+	}
+EX:	{ lwadd_na r12, r1, 4; addi r17, r17, 1 }
+EX:	{ lwadd_na r13, r1, 8; dword_align r6, r7, r1 }
+EX:	{ swadd r0, r6,  4; dword_align r7,  r8,  r1 }
+EX:	{ swadd r0, r7,  4; dword_align r8,  r9,  r1 }
+EX:	{ swadd r0, r8,  4; dword_align r9,  r10, r1 }
+EX:	{ swadd r0, r9,  4; dword_align r10, r11, r1 }
+EX:	{ swadd r0, r10, 4; dword_align r11, r12, r1 }
+EX:	{ swadd r0, r11, 4; dword_align r12, r13, r1 }
+EX:	{ swadd r0, r12, 4; dword_align r13, r14, r1 }
+EX:	{ swadd r0, r13, 4; addi r2, r2, -32 }
+	{ move r6, r14; bbst r17, .Lcopy_half_an_unaligned_line }
+
+	{ bzt r16, .Lcopy_unaligned_line; move r7, r6 }
+
+	/* On entry:
+	 * - r0 is the next store address.
+	 * - r1 points 4 bytes past the load address corresponding to r0.
+	 * - r2 >= 0 (# of bytes left to store).
+	 * - r7 is the next aligned src word value.
+	 */
+.Lcleanup_unaligned_words:
+	/* Handle any trailing bytes. */
+	{ bz r2, .Lcopy_unaligned_done; slti_u r8, r2, 4 }
+	{ bzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
+
+	/* Move r1 back to the point where it corresponds to r0. */
+	{ addi r1, r1, -4 }
+
+#else /* !CHIP_HAS_DWORD_ALIGN() */
+
+	/* Compute right/left shift counts and load initial source words. */
+	{ andi r5, r1, -4; andi r3, r1, 3 }
+EX:	{ lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
+EX:	{ lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
+
+	/* Load and store one word at a time, using shifts and ORs
+	 * to correct for the misaligned src.
+	 */
+.Lcopy_unaligned_src_loop:
+	{ shr r6, r6, r3; shl r8, r7, r4 }
+EX:	{ lw r7, r5; or r8, r8, r6; move r6, r7 }
+EX:	{ sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
+	{ addi r5, r5, 4; slti_u r8, r2, 8 }
+	{ bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
+
+	{ bz r2, .Lcopy_unaligned_done }
+#endif /* !CHIP_HAS_DWORD_ALIGN() */
+
+	/* Fall through */
+
+/*
+ *
+ * 1 byte at a time copy handler.
+ *
+ */
+
+.Lcopy_unaligned_few:
+EX:	{ lb_u r3, r1; addi r1, r1, 1 }
+EX:	{ sb r0, r3;   addi r0, r0, 1; addi r2, r2, -1 }
+	{ bnzt r2, .Lcopy_unaligned_few }
+
+.Lcopy_unaligned_done:
+
+	/* For memcpy return original dest address, else zero. */
+	{ mz r0, r29, r23; jrp lr }
+
+.Lend_memcpy_common:
+	.size memcpy_common, .Lend_memcpy_common - memcpy_common
+
+	.section .fixup,"ax"
+memcpy_common_fixup:
+	.type memcpy_common_fixup, @function
+
+	/* Skip any bytes we already successfully copied.
+	 * r2 (num remaining) is correct, but r0 (dst) and r1 (src)
+	 * may not be quite right because of unrolling and prefetching.
+	 * So we need to recompute their values as the address just
+	 * after the last byte we are sure was successfully loaded and
+	 * then stored.
+	 */
+
+	/* Determine how many bytes we successfully copied. */
+	{ sub r3, r25, r2 }
+
+	/* Add this to the original r0 and r1 to get their new values. */
+	{ add r0, r23, r3; add r1, r24, r3 }
+
+	{ bzt r29, memcpy_fixup_loop }
+	{ blzt r29, copy_to_user_fixup_loop }
+
+copy_from_user_fixup_loop:
+	/* Try copying the rest one byte at a time, expecting a load fault. */
+.Lcfu:	{ lb_u r3, r1; addi r1, r1, 1 }
+	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+	{ bnzt r2, copy_from_user_fixup_loop }
+
+.Lcopy_from_user_fixup_zero_remainder:
+	{ bbs r29, 2f }  /* low bit set means IS_COPY_FROM_USER */
+	/* byte-at-a-time loop faulted, so zero the rest. */
+	{ move r3, r2; bz r2, 2f /* should be impossible, but handle it. */ }
+1:      { sb r0, zero; addi r0, r0, 1; addi r3, r3, -1 }
+	{ bnzt r3, 1b }
+2:	move lr, r27
+	{ move r0, r2; jrp lr }
+
+copy_to_user_fixup_loop:
+	/* Try copying the rest one byte at a time, expecting a store fault. */
+	{ lb_u r3, r1; addi r1, r1, 1 }
+.Lctu:	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+	{ bnzt r2, copy_to_user_fixup_loop }
+.Lcopy_to_user_fixup_done:
+	move lr, r27
+	{ move r0, r2; jrp lr }
+
+memcpy_fixup_loop:
+	/* Try copying the rest one byte at a time. We expect a disastrous
+	 * fault to happen since we are in fixup code, but let it happen.
+	 */
+	{ lb_u r3, r1; addi r1, r1, 1 }
+	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+	{ bnzt r2, memcpy_fixup_loop }
+	/* This should be unreachable, we should have faulted again.
+	 * But be paranoid and handle it in case some interrupt changed
+	 * the TLB or something.
+	 */
+	move lr, r27
+	{ move r0, r23; jrp lr }
+
+	.size memcpy_common_fixup, . - memcpy_common_fixup
+
+	.section __ex_table,"a"
+	.word .Lcfu, .Lcopy_from_user_fixup_zero_remainder
+	.word .Lctu, .Lcopy_to_user_fixup_done
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
new file mode 100644
index 0000000..4f00473
--- /dev/null
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <asm/fixmap.h>
+#include <asm/kmap_types.h>
+#include <asm/tlbflush.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+
+/* Defined in memcpy.S */
+extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
+extern unsigned long __copy_to_user_inatomic_asm(
+	void __user *to, const void *from, unsigned long n);
+extern unsigned long __copy_from_user_inatomic_asm(
+	void *to, const void __user *from, unsigned long n);
+extern unsigned long __copy_from_user_zeroing_asm(
+	void *to, const void __user *from, unsigned long n);
+
+typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
+
+/* Size above which to consider TLB games for performance */
+#define LARGE_COPY_CUTOFF 2048
+
+/* Communicate to the simulator what we are trying to do. */
+#define sim_allow_multiple_caching(b) \
+  __insn_mtspr(SPR_SIM_CONTROL, \
+   SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
+
+/*
+ * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
+ *
+ * We set up our own source and destination PTEs that we fully control.
+ * This is the only way to guarantee that we don't race with another
+ * thread that is modifying the PTE; we can't afford to try the
+ * copy_{to,from}_user() technique of catching the interrupt, since
+ * we must run with interrupts disabled to avoid the risk of some
+ * other code seeing the incoherent data in our cache.  (Recall that
+ * our cache is indexed by PA, so even if the other code doesn't use
+ * our KM_MEMCPY virtual addresses, they'll still hit in cache using
+ * the normal VAs that aren't supposed to hit in cache.)
+ */
+static void memcpy_multicache(void *dest, const void *source,
+			      pte_t dst_pte, pte_t src_pte, int len)
+{
+	int idx, i;
+	unsigned long flags, newsrc, newdst, endsrc;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int cpu = get_cpu();
+
+	/*
+	 * Disable interrupts so that we don't recurse into memcpy()
+	 * in an interrupt handler, nor accidentally reference
+	 * the PA of the source from an interrupt routine.  Also
+	 * notify the simulator that we're playing games so we don't
+	 * generate spurious coherency warnings.
+	 */
+	local_irq_save(flags);
+	sim_allow_multiple_caching(1);
+
+	/* Set up the new dest mapping */
+	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + KM_MEMCPY0;
+	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
+	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
+	ptep = pte_offset_kernel(pmdp, newdst);
+	if (pte_val(*ptep) != pte_val(dst_pte)) {
+		set_pte(ptep, dst_pte);
+		local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
+	}
+
+	/* Set up the new source mapping */
+	idx += (KM_MEMCPY0 - KM_MEMCPY1);
+	src_pte = hv_pte_set_nc(src_pte);
+	src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */
+	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
+	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
+	ptep = pte_offset_kernel(pmdp, newsrc);
+	*ptep = src_pte;   /* set_pte() would be confused by this */
+	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
+
+	/* Actually move the data. */
+	__memcpy_asm((void *)newdst, (const void *)newsrc, len);
+
+	/*
+	 * Remap the source as locally-cached and not OLOC'ed so that
+	 * we can inval without also invaling the remote cpu's cache.
+	 * This also avoids known errata with inv'ing cacheable oloc data.
+	 */
+	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
+	src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
+	*ptep = src_pte;   /* set_pte() would be confused by this */
+	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
+
+	/*
+	 * Do the actual invalidation, covering the full L2 cache line
+	 * at the end since __memcpy_asm() is somewhat aggressive.
+	 */
+	__inv_buffer((void *)newsrc, len);
+
+	/*
+	 * We're done: notify the simulator that all is back to normal,
+	 * and re-enable interrupts and pre-emption.
+	 */
+	sim_allow_multiple_caching(0);
+	local_irq_restore(flags);
+	put_cpu_no_resched();
+}
+
+/*
+ * Identify large copies from remotely-cached memory, and copy them
+ * via memcpy_multicache() if they look good, otherwise fall back
+ * to the particular kind of copying passed as the memcpy_t function.
+ */
+static unsigned long fast_copy(void *dest, const void *source, int len,
+			       memcpy_t func)
+{
+	/*
+	 * Check if it's big enough to bother with.  We may end up doing a
+	 * small copy via TLB manipulation if we're near a page boundary,
+	 * but presumably we'll make it up when we hit the second page.
+	 */
+	while (len >= LARGE_COPY_CUTOFF) {
+		int copy_size, bytes_left_on_page;
+		pte_t *src_ptep, *dst_ptep;
+		pte_t src_pte, dst_pte;
+		struct page *src_page, *dst_page;
+
+		/* Is the source page oloc'ed to a remote cpu? */
+retry_source:
+		src_ptep = virt_to_pte(current->mm, (unsigned long)source);
+		if (src_ptep == NULL)
+			break;
+		src_pte = *src_ptep;
+		if (!hv_pte_get_present(src_pte) ||
+		    !hv_pte_get_readable(src_pte) ||
+		    hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
+			break;
+		if (get_remote_cache_cpu(src_pte) == smp_processor_id())
+			break;
+		src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
+		get_page(src_page);
+		if (pte_val(src_pte) != pte_val(*src_ptep)) {
+			put_page(src_page);
+			goto retry_source;
+		}
+		if (pte_huge(src_pte)) {
+			/* Adjust the PTE to correspond to a small page */
+			int pfn = hv_pte_get_pfn(src_pte);
+			pfn += (((unsigned long)source & (HPAGE_SIZE-1))
+				>> PAGE_SHIFT);
+			src_pte = pfn_pte(pfn, src_pte);
+			src_pte = pte_mksmall(src_pte);
+		}
+
+		/* Is the destination page writable? */
+retry_dest:
+		dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
+		if (dst_ptep == NULL) {
+			put_page(src_page);
+			break;
+		}
+		dst_pte = *dst_ptep;
+		if (!hv_pte_get_present(dst_pte) ||
+		    !hv_pte_get_writable(dst_pte)) {
+			put_page(src_page);
+			break;
+		}
+		dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
+		if (dst_page == src_page) {
+			/*
+			 * Source and dest are on the same page; this
+			 * potentially exposes us to incoherence if any
+			 * part of src and dest overlap on a cache line.
+			 * Just give up rather than trying to be precise.
+			 */
+			put_page(src_page);
+			break;
+		}
+		get_page(dst_page);
+		if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
+			put_page(dst_page);
+			goto retry_dest;
+		}
+		if (pte_huge(dst_pte)) {
+			/* Adjust the PTE to correspond to a small page */
+			int pfn = hv_pte_get_pfn(dst_pte);
+			pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
+				>> PAGE_SHIFT);
+			dst_pte = pfn_pte(pfn, dst_pte);
+			dst_pte = pte_mksmall(dst_pte);
+		}
+
+		/* All looks good: create a cachable PTE and copy from it */
+		copy_size = len;
+		bytes_left_on_page =
+			PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
+		if (copy_size > bytes_left_on_page)
+			copy_size = bytes_left_on_page;
+		bytes_left_on_page =
+			PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
+		if (copy_size > bytes_left_on_page)
+			copy_size = bytes_left_on_page;
+		memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
+
+		/* Release the pages */
+		put_page(dst_page);
+		put_page(src_page);
+
+		/* Continue on the next page */
+		dest += copy_size;
+		source += copy_size;
+		len -= copy_size;
+	}
+
+	return func(dest, source, len);
+}
+
+void *memcpy(void *to, const void *from, __kernel_size_t n)
+{
+	if (n < LARGE_COPY_CUTOFF)
+		return (void *)__memcpy_asm(to, from, n);
+	else
+		return (void *)fast_copy(to, from, n, __memcpy_asm);
+}
+
+unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
+				      unsigned long n)
+{
+	if (n < LARGE_COPY_CUTOFF)
+		return __copy_to_user_inatomic_asm(to, from, n);
+	else
+		return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
+}
+
+unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
+					unsigned long n)
+{
+	if (n < LARGE_COPY_CUTOFF)
+		return __copy_from_user_inatomic_asm(to, from, n);
+	else
+		return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
+}
+
+unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
+				       unsigned long n)
+{
+	if (n < LARGE_COPY_CUTOFF)
+		return __copy_from_user_zeroing_asm(to, from, n);
+	else
+		return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
+}
+
+#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memmove_32.c b/arch/tile/lib/memmove_32.c
new file mode 100644
index 0000000..f09d8c4
--- /dev/null
+++ b/arch/tile/lib/memmove_32.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	if ((const char *)src >= (char *)dest + n
+	    || (char *)dest >= (const char *)src + n) {
+		/* We found no overlap, so let memcpy do all the heavy
+		 * lifting (prefetching, etc.)
+		 */
+		return memcpy(dest, src, n);
+	}
+
+	if (n != 0) {
+		const uint8_t *in;
+		uint8_t x;
+		uint8_t *out;
+		int stride;
+
+		if (src < dest) {
+			/* copy backwards */
+			in = (const uint8_t *)src + n - 1;
+			out = (uint8_t *)dest + n - 1;
+			stride = -1;
+		} else {
+			/* copy forwards */
+			in = (const uint8_t *)src;
+			out = (uint8_t *)dest;
+			stride = 1;
+                }
+
+		/* Manually software-pipeline this loop. */
+		x = *in;
+		in += stride;
+
+		while (--n != 0) {
+			*out = x;
+			out += stride;
+			x = *in;
+			in += stride;
+		}
+
+		*out = x;
+	}
+
+	return dest;
+}
+EXPORT_SYMBOL(memmove);
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
new file mode 100644
index 0000000..8593bc82
--- /dev/null
+++ b/arch/tile/lib/memset_32.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+
+void *memset(void *s, int c, size_t n)
+{
+	uint32_t *out32;
+	int n32;
+	uint32_t v16, v32;
+	uint8_t *out8 = s;
+#if !CHIP_HAS_WH64()
+	int ahead32;
+#else
+	int to_align32;
+#endif
+
+	/* Experimentation shows that a trivial tight loop is a win up until
+	 * around a size of 20, where writing a word at a time starts to win.
+	 */
+#define BYTE_CUTOFF 20
+
+#if BYTE_CUTOFF < 3
+	/* This must be at least at least this big, or some code later
+	 * on doesn't work.
+	 */
+#error "BYTE_CUTOFF is too small"
+#endif
+
+	if (n < BYTE_CUTOFF) {
+		/* Strangely, this turns out to be the tightest way to
+		 * write this loop.
+		 */
+		if (n != 0) {
+			do {
+				/* Strangely, combining these into one line
+				 * performs worse.
+				 */
+				*out8 = c;
+				out8++;
+			} while (--n != 0);
+		}
+
+		return s;
+	}
+
+#if !CHIP_HAS_WH64()
+	/* Use a spare issue slot to start prefetching the first cache
+	 * line early. This instruction is free as the store can be buried
+	 * in otherwise idle issue slots doing ALU ops.
+	 */
+	__insn_prefetch(out8);
+
+	/* We prefetch the end so that a short memset that spans two cache
+	 * lines gets some prefetching benefit. Again we believe this is free
+	 * to issue.
+	 */
+	__insn_prefetch(&out8[n - 1]);
+#endif /* !CHIP_HAS_WH64() */
+
+
+	/* Align 'out8'. We know n >= 3 so this won't write past the end. */
+	while (((uintptr_t) out8 & 3) != 0) {
+		*out8++ = c;
+		--n;
+	}
+
+	/* Align 'n'. */
+	while (n & 3)
+		out8[--n] = c;
+
+	out32 = (uint32_t *) out8;
+	n32 = n >> 2;
+
+	/* Tile input byte out to 32 bits. */
+	v16 = __insn_intlb(c, c);
+	v32 = __insn_intlh(v16, v16);
+
+	/* This must be at least 8 or the following loop doesn't work. */
+#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
+
+#if !CHIP_HAS_WH64()
+
+	ahead32 = CACHE_LINE_SIZE_IN_WORDS;
+
+	/* We already prefetched the first and last cache lines, so
+	 * we only need to do more prefetching if we are storing
+	 * to more than two cache lines.
+	 */
+	if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
+		int i;
+
+		/* Prefetch the next several cache lines.
+		 * This is the setup code for the software-pipelined
+		 * loop below.
+		 */
+#define MAX_PREFETCH 5
+		ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
+		if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
+			ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
+
+		for (i = CACHE_LINE_SIZE_IN_WORDS;
+		     i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
+			__insn_prefetch(&out32[i]);
+	}
+
+	if (n32 > ahead32) {
+		while (1) {
+			int j;
+
+			/* Prefetch by reading one word several cache lines
+			 * ahead.  Since loads are non-blocking this will
+			 * cause the full cache line to be read while we are
+			 * finishing earlier cache lines.  Using a store
+			 * here causes microarchitectural performance
+			 * problems where a victimizing store miss goes to
+			 * the head of the retry FIFO and locks the pipe for
+			 * a few cycles.  So a few subsequent stores in this
+			 * loop go into the retry FIFO, and then later
+			 * stores see other stores to the same cache line
+			 * are already in the retry FIFO and themselves go
+			 * into the retry FIFO, filling it up and grinding
+			 * to a halt waiting for the original miss to be
+			 * satisfied.
+			 */
+			__insn_prefetch(&out32[ahead32]);
+
+#if 1
+#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
+#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
+#endif
+
+			n32 -= CACHE_LINE_SIZE_IN_WORDS;
+
+			/* Save icache space by only partially unrolling
+			 * this loop.
+			 */
+			for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
+				*out32++ = v32;
+				*out32++ = v32;
+				*out32++ = v32;
+				*out32++ = v32;
+			}
+#else
+			/* Unfortunately, due to a code generator flaw this
+			 * allocates a separate register for each of these
+			 * stores, which requires a large number of spills,
+			 * which makes this procedure enormously bigger
+			 * (something like 70%)
+			 */
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			*out32++ = v32;
+			n32 -= 16;
+#endif
+
+			/* To save compiled code size, reuse this loop even
+			 * when we run out of prefetching to do by dropping
+			 * ahead32 down.
+			 */
+			if (n32 <= ahead32) {
+				/* Not even a full cache line left,
+				 * so stop now.
+				 */
+				if (n32 < CACHE_LINE_SIZE_IN_WORDS)
+					break;
+
+				/* Choose a small enough value that we don't
+				 * prefetch past the end.  There's no sense
+				 * in touching cache lines we don't have to.
+				 */
+				ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
+			}
+		}
+	}
+
+#else /* CHIP_HAS_WH64() */
+
+	/* Determine how many words we need to emit before the 'out32'
+	 * pointer becomes aligned modulo the cache line size.
+	 */
+	to_align32 =
+		(-((uintptr_t)out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1);
+
+	/* Only bother aligning and using wh64 if there is at least
+	 * one full cache line to process.  This check also prevents
+	 * overrunning the end of the buffer with alignment words.
+	 */
+	if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) {
+		int lines_left;
+
+		/* Align out32 mod the cache line size so we can use wh64. */
+		n32 -= to_align32;
+		for (; to_align32 != 0; to_align32--) {
+			*out32 = v32;
+			out32++;
+		}
+
+		/* Use unsigned divide to turn this into a right shift. */
+		lines_left = (unsigned)n32 / CACHE_LINE_SIZE_IN_WORDS;
+
+		do {
+			/* Only wh64 a few lines at a time, so we don't
+			 * exceed the maximum number of victim lines.
+			 */
+			int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
+				  ? lines_left
+				  : CHIP_MAX_OUTSTANDING_VICTIMS());
+			uint32_t *wh = out32;
+			int i = x;
+			int j;
+
+			lines_left -= x;
+
+			do {
+				__insn_wh64(wh);
+				wh += CACHE_LINE_SIZE_IN_WORDS;
+			} while (--i);
+
+			for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4); j != 0; j--) {
+				*out32++ = v32;
+				*out32++ = v32;
+				*out32++ = v32;
+				*out32++ = v32;
+			}
+		} while (lines_left != 0);
+
+		/* We processed all full lines above, so only this many
+		 * words remain to be processed.
+		 */
+		n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
+	}
+
+#endif /* CHIP_HAS_WH64() */
+
+	/* Now handle any leftover values. */
+	if (n32 != 0) {
+		do {
+			*out32 = v32;
+			out32++;
+		} while (--n32 != 0);
+	}
+
+	return s;
+}
+EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
new file mode 100644
index 0000000..485e24d
--- /dev/null
+++ b/arch/tile/lib/spinlock_32.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+
+#include "spinlock_common.h"
+
+void arch_spin_lock(arch_spinlock_t *lock)
+{
+	int my_ticket;
+	int iterations = 0;
+	int delta;
+
+	while ((my_ticket = __insn_tns((void *)&lock->next_ticket)) & 1)
+		delay_backoff(iterations++);
+
+	/* Increment the next ticket number, implicitly releasing tns lock. */
+	lock->next_ticket = my_ticket + TICKET_QUANTUM;
+
+	/* Wait until it's our turn. */
+	while ((delta = my_ticket - lock->current_ticket) != 0)
+		relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
+}
+EXPORT_SYMBOL(arch_spin_lock);
+
+int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	/*
+	 * Grab a ticket; no need to retry if it's busy, we'll just
+	 * treat that the same as "locked", since someone else
+	 * will lock it momentarily anyway.
+	 */
+	int my_ticket = __insn_tns((void *)&lock->next_ticket);
+
+	if (my_ticket == lock->current_ticket) {
+		/* Not currently locked, so lock it by keeping this ticket. */
+		lock->next_ticket = my_ticket + TICKET_QUANTUM;
+		/* Success! */
+		return 1;
+	}
+
+	if (!(my_ticket & 1)) {
+		/* Release next_ticket. */
+		lock->next_ticket = my_ticket;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock);
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u32 iterations = 0;
+	while (arch_spin_is_locked(lock))
+		delay_backoff(iterations++);
+}
+EXPORT_SYMBOL(arch_spin_unlock_wait);
+
+/*
+ * The low byte is always reserved to be the marker for a "tns" operation
+ * since the low bit is set to "1" by a tns.  The next seven bits are
+ * zeroes.  The next byte holds the "next" writer value, i.e. the ticket
+ * available for the next task that wants to write.  The third byte holds
+ * the current writer value, i.e. the writer who holds the current ticket.
+ * If current == next == 0, there are no interested writers.
+ */
+#define WR_NEXT_SHIFT   _WR_NEXT_SHIFT
+#define WR_CURR_SHIFT   _WR_CURR_SHIFT
+#define WR_WIDTH        _WR_WIDTH
+#define WR_MASK         ((1 << WR_WIDTH) - 1)
+
+/*
+ * The last eight bits hold the active reader count.  This has to be
+ * zero before a writer can start to write.
+ */
+#define RD_COUNT_SHIFT  _RD_COUNT_SHIFT
+#define RD_COUNT_WIDTH  _RD_COUNT_WIDTH
+#define RD_COUNT_MASK   ((1 << RD_COUNT_WIDTH) - 1)
+
+
+/* Lock the word, spinning until there are no tns-ers. */
+static inline u32 get_rwlock(arch_rwlock_t *rwlock)
+{
+	u32 iterations = 0;
+	for (;;) {
+		u32 val = __insn_tns((int *)&rwlock->lock);
+		if (unlikely(val & 1)) {
+			delay_backoff(iterations++);
+			continue;
+		}
+		return val;
+	}
+}
+
+int arch_read_trylock_slow(arch_rwlock_t *rwlock)
+{
+	u32 val = get_rwlock(rwlock);
+	int locked = (val << RD_COUNT_WIDTH) == 0;
+	rwlock->lock = val + (locked << RD_COUNT_SHIFT);
+	return locked;
+}
+EXPORT_SYMBOL(arch_read_trylock_slow);
+
+void arch_read_unlock_slow(arch_rwlock_t *rwlock)
+{
+	u32 val = get_rwlock(rwlock);
+	rwlock->lock = val - (1 << RD_COUNT_SHIFT);
+}
+EXPORT_SYMBOL(arch_read_unlock_slow);
+
+void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val)
+{
+	u32 eq, mask = 1 << WR_CURR_SHIFT;
+	while (unlikely(val & 1)) {
+		/* Limited backoff since we are the highest-priority task. */
+		relax(4);
+		val = __insn_tns((int *)&rwlock->lock);
+	}
+	val = __insn_addb(val, mask);
+	eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
+	val = __insn_mz(eq & mask, val);
+	rwlock->lock = val;
+}
+EXPORT_SYMBOL(arch_write_unlock_slow);
+
+/*
+ * We spin until everything but the reader bits (which are in the high
+ * part of the word) are zero, i.e. no active or waiting writers, no tns.
+ *
+ * ISSUE: This approach can permanently starve readers.  A reader who sees
+ * a writer could instead take a ticket lock (just like a writer would),
+ * and atomically enter read mode (with 1 reader) when it gets the ticket.
+ * This way both readers and writers will always make forward progress
+ * in a finite time.
+ */
+void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val)
+{
+	u32 iterations = 0;
+	do {
+		if (!(val & 1))
+			rwlock->lock = val;
+		delay_backoff(iterations++);
+		val = __insn_tns((int *)&rwlock->lock);
+	} while ((val << RD_COUNT_WIDTH) != 0);
+	rwlock->lock = val + (1 << RD_COUNT_SHIFT);
+}
+EXPORT_SYMBOL(arch_read_lock_slow);
+
+void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
+{
+	/*
+	 * The trailing underscore on this variable (and curr_ below)
+	 * reminds us that the high bits are garbage; we mask them out
+	 * when we compare them.
+	 */
+	u32 my_ticket_;
+
+	/* Take out the next ticket; this will also stop would-be readers. */
+	if (val & 1)
+		val = get_rwlock(rwlock);
+	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
+
+	/* Extract my ticket value from the original word. */
+	my_ticket_ = val >> WR_NEXT_SHIFT;
+
+	/*
+	 * Wait until the "current" field matches our ticket, and
+	 * there are no remaining readers.
+	 */
+	for (;;) {
+		u32 curr_ = val >> WR_CURR_SHIFT;
+		u32 readers = val >> RD_COUNT_SHIFT;
+		u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers;
+		if (likely(delta == 0))
+			break;
+
+		/* Delay based on how many lock-holders are still out there. */
+		relax((256 / CYCLES_PER_RELAX_LOOP) * delta);
+
+		/*
+		 * Get a non-tns value to check; we don't need to tns
+		 * it ourselves.  Since we're not tns'ing, we retry
+		 * more rapidly to get a valid value.
+		 */
+		while ((val = rwlock->lock) & 1)
+			relax(4);
+	}
+}
+EXPORT_SYMBOL(arch_write_lock_slow);
+
+int __tns_atomic_acquire(atomic_t *lock)
+{
+	int ret;
+	u32 iterations = 0;
+
+	BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION));
+	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
+
+	while ((ret = __insn_tns((void *)&lock->counter)) == 1)
+		delay_backoff(iterations++);
+	return ret;
+}
+
+void __tns_atomic_release(atomic_t *p, int v)
+{
+	p->counter = v;
+	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
+}
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
new file mode 100644
index 0000000..8dffebd
--- /dev/null
+++ b/arch/tile/lib/spinlock_common.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ * This file is included into spinlock_32.c or _64.c.
+ */
+
+/*
+ * The mfspr in __spinlock_relax() is 5 or 6 cycles plus 2 for loop
+ * overhead.
+ */
+#ifdef __tilegx__
+#define CYCLES_PER_RELAX_LOOP 7
+#else
+#define CYCLES_PER_RELAX_LOOP 8
+#endif
+
+/*
+ * Idle the core for CYCLES_PER_RELAX_LOOP * iterations cycles.
+ */
+static inline void
+relax(int iterations)
+{
+	for (/*above*/; iterations > 0; iterations--)
+		__insn_mfspr(SPR_PASS);
+	barrier();
+}
+
+/* Perform bounded exponential backoff.*/
+void delay_backoff(int iterations)
+{
+	u32 exponent, loops;
+
+	/*
+	 * 2^exponent is how many times we go around the loop,
+	 * which takes 8 cycles.  We want to start with a 16- to 31-cycle
+	 * loop, so we need to go around minimum 2 = 2^1 times, so we
+	 * bias the original value up by 1.
+	 */
+	exponent = iterations + 1;
+
+	/*
+	 * Don't allow exponent to exceed 7, so we have 128 loops,
+	 * or 1,024 (to 2,047) cycles, as our maximum.
+	 */
+	if (exponent > 8)
+		exponent = 8;
+
+	loops = 1 << exponent;
+
+	/* Add a randomness factor so two cpus never get in lock step. */
+	loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
+		(loops - 1);
+
+	relax(1 << exponent);
+}
diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c
new file mode 100644
index 0000000..c94e6f7
--- /dev/null
+++ b/arch/tile/lib/strchr_32.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef strchr
+
+char *strchr(const char *s, int c)
+{
+	int z, g;
+
+	/* Get an aligned pointer. */
+	const uintptr_t s_int = (uintptr_t) s;
+	const uint32_t *p = (const uint32_t *)(s_int & -4);
+
+	/* Create four copies of the byte for which we are looking. */
+	const uint32_t goal = 0x01010101 * (uint8_t) c;
+
+	/* Read the first aligned word, but force bytes before the string to
+	 * match neither zero nor goal (we make sure the high bit of each
+	 * byte is 1, and the low 7 bits are all the opposite of the goal
+	 * byte).
+	 *
+	 * Note that this shift count expression works because we know shift
+	 * counts are taken mod 32.
+	 */
+	const uint32_t before_mask = (1 << (s_int << 3)) - 1;
+	uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib(before_mask, 1));
+
+	uint32_t zero_matches, goal_matches;
+	while (1) {
+		/* Look for a terminating '\0'. */
+		zero_matches = __insn_seqb(v, 0);
+
+		/* Look for the goal byte. */
+		goal_matches = __insn_seqb(v, goal);
+
+		if (__builtin_expect(zero_matches | goal_matches, 0))
+			break;
+
+		v = *++p;
+	}
+
+	z = __insn_ctz(zero_matches);
+	g = __insn_ctz(goal_matches);
+
+	/* If we found c before '\0' we got a match. Note that if c == '\0'
+	 * then g == z, and we correctly return the address of the '\0'
+	 * rather than NULL.
+	 */
+	return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
+}
+EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c
new file mode 100644
index 0000000..f26f88e
--- /dev/null
+++ b/arch/tile/lib/strlen_32.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+size_t strlen(const char *s)
+{
+	/* Get an aligned pointer. */
+	const uintptr_t s_int = (uintptr_t) s;
+	const uint32_t *p = (const uint32_t *)(s_int & -4);
+
+	/* Read the first word, but force bytes before the string to be nonzero.
+	 * This expression works because we know shift counts are taken mod 32.
+	 */
+	uint32_t v = *p | ((1 << (s_int << 3)) - 1);
+
+	uint32_t bits;
+	while ((bits = __insn_seqb(v, 0)) == 0)
+		v = *++p;
+
+	return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
+}
+EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c
new file mode 100644
index 0000000..9ae1825
--- /dev/null
+++ b/arch/tile/lib/uaccess.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/module.h>
+
+int __range_ok(unsigned long addr, unsigned long size)
+{
+	unsigned long limit = current_thread_info()->addr_limit.seg;
+	__chk_user_ptr(addr);
+	return !((addr < limit && size <= limit - addr) ||
+		 is_arch_mappable_range(addr, size));
+}
+EXPORT_SYMBOL(__range_ok);
+
+void copy_from_user_overflow(void)
+{
+       WARN(1, "Buffer overflow detected!\n");
+}
+EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
new file mode 100644
index 0000000..979f76d
--- /dev/null
+++ b/arch/tile/lib/usercopy_32.S
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/cache.h>
+#include <arch/chip.h>
+
+/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
+
+	.pushsection .fixup,"ax"
+
+get_user_fault:
+	{ move r0, zero; move r1, zero }
+	{ movei r2, -EFAULT; jrp lr }
+	ENDPROC(get_user_fault)
+
+put_user_fault:
+	{ movei r0, -EFAULT; jrp lr }
+	ENDPROC(put_user_fault)
+
+	.popsection
+
+/*
+ * __get_user_N functions take a pointer in r0, and return 0 in r2
+ * on success, with the value in r0; or else -EFAULT in r2.
+ */
+#define __get_user_N(bytes, LOAD) \
+	STD_ENTRY(__get_user_##bytes); \
+1:	{ LOAD r0, r0; move r1, zero; move r2, zero }; \
+	jrp lr; \
+	STD_ENDPROC(__get_user_##bytes); \
+	.pushsection __ex_table,"a"; \
+	.word 1b, get_user_fault; \
+	.popsection
+
+__get_user_N(1, lb_u)
+__get_user_N(2, lh_u)
+__get_user_N(4, lw)
+
+/*
+ * __get_user_8 takes a pointer in r0, and returns 0 in r2
+ * on success, with the value in r0/r1; or else -EFAULT in r2.
+ */
+	STD_ENTRY(__get_user_8);
+1:	{ lw r0, r0; addi r1, r0, 4 };
+2:	{ lw r1, r1; move r2, zero };
+	jrp lr;
+	STD_ENDPROC(__get_user_8);
+	.pushsection __ex_table,"a";
+	.word 1b, get_user_fault;
+	.word 2b, get_user_fault;
+	.popsection
+
+/*
+ * __put_user_N functions take a value in r0 and a pointer in r1,
+ * and return 0 in r0 on success or -EFAULT on failure.
+ */
+#define __put_user_N(bytes, STORE) \
+	STD_ENTRY(__put_user_##bytes); \
+1:	{ STORE r1, r0; move r0, zero }; \
+	jrp lr; \
+	STD_ENDPROC(__put_user_##bytes); \
+	.pushsection __ex_table,"a"; \
+	.word 1b, put_user_fault; \
+	.popsection
+
+__put_user_N(1, sb)
+__put_user_N(2, sh)
+__put_user_N(4, sw)
+
+/*
+ * __put_user_8 takes a value in r0/r1 and a pointer in r2,
+ * and returns 0 in r0 on success or -EFAULT on failure.
+ */
+STD_ENTRY(__put_user_8)
+1:      { sw r2, r0; addi r2, r2, 4 }
+2:      { sw r2, r1; move r0, zero }
+	jrp lr
+	STD_ENDPROC(__put_user_8)
+	.pushsection __ex_table,"a"
+	.word 1b, put_user_fault
+	.word 2b, put_user_fault
+	.popsection
+
+
+/*
+ * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
+ * It returns the length, including the terminating NUL, or zero on exception.
+ * If length is greater than the bound, returns one plus the bound.
+ */
+STD_ENTRY(strnlen_user_asm)
+	{ bz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
+1:      { lb_u r4, r0; addi r1, r1, -1 }
+	bz r4, 2f
+	{ bnzt r1, 1b; addi r0, r0, 1 }
+2:      { sub r0, r0, r3; jrp lr }
+	STD_ENDPROC(strnlen_user_asm)
+	.pushsection .fixup,"ax"
+strnlen_user_fault:
+	{ move r0, zero; jrp lr }
+	ENDPROC(strnlen_user_fault)
+	.section __ex_table,"a"
+	.word 1b, strnlen_user_fault
+	.popsection
+
+/*
+ * strncpy_from_user_asm takes the kernel target pointer in r0,
+ * the userspace source pointer in r1, and the length bound (including
+ * the trailing NUL) in r2.  On success, it returns the string length
+ * (not including the trailing NUL), or -EFAULT on failure.
+ */
+STD_ENTRY(strncpy_from_user_asm)
+	{ bz r2, 2f; move r3, r0 }
+1:      { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+	{ sb r0, r4; addi r0, r0, 1 }
+	bz r2, 2f
+	bnzt r4, 1b
+	addi r0, r0, -1   /* don't count the trailing NUL */
+2:      { sub r0, r0, r3; jrp lr }
+	STD_ENDPROC(strncpy_from_user_asm)
+	.pushsection .fixup,"ax"
+strncpy_from_user_fault:
+	{ movei r0, -EFAULT; jrp lr }
+	ENDPROC(strncpy_from_user_fault)
+	.section __ex_table,"a"
+	.word 1b, strncpy_from_user_fault
+	.popsection
+
+/*
+ * clear_user_asm takes the user target address in r0 and the
+ * number of bytes to zero in r1.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ * Note that we don't use a separate .fixup section here since we fall
+ * through into the "fixup" code as the last straight-line bundle anyway.
+ */
+STD_ENTRY(clear_user_asm)
+	{ bz r1, 2f; or r2, r0, r1 }
+	andi r2, r2, 3
+	bzt r2, .Lclear_aligned_user_asm
+1:      { sb r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
+	bnzt r1, 1b
+2:      { move r0, r1; jrp lr }
+	.pushsection __ex_table,"a"
+	.word 1b, 2b
+	.popsection
+
+.Lclear_aligned_user_asm:
+1:      { sw r0, zero; addi r0, r0, 4; addi r1, r1, -4 }
+	bnzt r1, 1b
+2:      { move r0, r1; jrp lr }
+	STD_ENDPROC(clear_user_asm)
+	.pushsection __ex_table,"a"
+	.word 1b, 2b
+	.popsection
+
+/*
+ * flush_user_asm takes the user target address in r0 and the
+ * number of bytes to flush in r1.
+ * It returns the number of unflushable bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(flush_user_asm)
+	bz r1, 2f
+	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+	{ and r0, r0, r2; and r1, r1, r2 }
+	{ sub r1, r1, r0 }
+1:      { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
+	{ addi r0, r0, CHIP_FLUSH_STRIDE(); bnzt r1, 1b }
+2:      { move r0, r1; jrp lr }
+	STD_ENDPROC(flush_user_asm)
+	.pushsection __ex_table,"a"
+	.word 1b, 2b
+	.popsection
+
+/*
+ * inv_user_asm takes the user target address in r0 and the
+ * number of bytes to invalidate in r1.
+ * It returns the number of not inv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(inv_user_asm)
+	bz r1, 2f
+	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+	{ and r0, r0, r2; and r1, r1, r2 }
+	{ sub r1, r1, r0 }
+1:      { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
+	{ addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b }
+2:      { move r0, r1; jrp lr }
+	STD_ENDPROC(inv_user_asm)
+	.pushsection __ex_table,"a"
+	.word 1b, 2b
+	.popsection
+
+/*
+ * finv_user_asm takes the user target address in r0 and the
+ * number of bytes to flush-invalidate in r1.
+ * It returns the number of not finv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(finv_user_asm)
+	bz r1, 2f
+	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+	{ and r0, r0, r2; and r1, r1, r2 }
+	{ sub r1, r1, r0 }
+1:      { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
+	{ addi r0, r0, CHIP_FINV_STRIDE(); bnzt r1, 1b }
+2:      { move r0, r1; jrp lr }
+	STD_ENDPROC(finv_user_asm)
+	.pushsection __ex_table,"a"
+	.word 1b, 2b
+	.popsection
diff --git a/arch/tile/mm/Makefile b/arch/tile/mm/Makefile
new file mode 100644
index 0000000..e252aed
--- /dev/null
+++ b/arch/tile/mm/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the linux tile-specific parts of the memory manager.
+#
+
+obj-y	:= init.o pgtable.o fault.o extable.o elf.o \
+	   mmap.o homecache.o migrate_$(BITS).o
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
new file mode 100644
index 0000000..818c9be
--- /dev/null
+++ b/arch/tile/mm/elf.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/mman.h>
+#include <linux/elf.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+/* Notify a running simulator, if any, that an exec just occurred. */
+static void sim_notify_exec(const char *binary_name)
+{
+	unsigned char c;
+	do {
+		c = *binary_name++;
+		__insn_mtspr(SPR_SIM_CONTROL,
+			     (SIM_CONTROL_OS_EXEC
+			      | (c << _SIM_CONTROL_OPERATOR_BITS)));
+
+	} while (c);
+}
+
+static int notify_exec(void)
+{
+	int retval = 0;  /* failure */
+	struct vm_area_struct *vma = current->mm->mmap;
+	while (vma) {
+		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
+			break;
+		vma = vma->vm_next;
+	}
+	if (vma) {
+		char *buf = (char *) __get_free_page(GFP_KERNEL);
+		if (buf) {
+			char *path = d_path(&vma->vm_file->f_path,
+					    buf, PAGE_SIZE);
+			if (!IS_ERR(path)) {
+				sim_notify_exec(path);
+				retval = 1;
+			}
+			free_page((unsigned long)buf);
+		}
+	}
+	return retval;
+}
+
+/* Notify a running simulator, if any, that we loaded an interpreter. */
+static void sim_notify_interp(unsigned long load_addr)
+{
+	size_t i;
+	for (i = 0; i < sizeof(load_addr); i++) {
+		unsigned char c = load_addr >> (i * 8);
+		__insn_mtspr(SPR_SIM_CONTROL,
+			     (SIM_CONTROL_OS_INTERP
+			      | (c << _SIM_CONTROL_OPERATOR_BITS)));
+	}
+}
+
+
+/* Kernel address of page used to map read-only kernel data into userspace. */
+static void *vdso_page;
+
+/* One-entry array used for install_special_mapping. */
+static struct page *vdso_pages[1];
+
+int __init vdso_setup(void)
+{
+	extern char __rt_sigreturn[], __rt_sigreturn_end[];
+	vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn);
+	vdso_pages[0] = virt_to_page(vdso_page);
+	return 0;
+}
+device_initcall(vdso_setup);
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_private_data == vdso_pages)
+		return "[vdso]";
+#ifndef __tilegx__
+	if (vma->vm_start == MEM_USER_INTRPT)
+		return "[intrpt]";
+#endif
+	return NULL;
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+				int executable_stack)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vdso_base;
+	int retval = 0;
+
+	/*
+	 * Notify the simulator that an exec just occurred.
+	 * If we can't find the filename of the mapping, just use
+	 * whatever was passed as the linux_binprm filename.
+	 */
+	if (!notify_exec())
+		sim_notify_exec(bprm->filename);
+
+	down_write(&mm->mmap_sem);
+
+	/*
+	 * MAYWRITE to allow gdb to COW and set breakpoints
+	 *
+	 * Make sure the vDSO gets into every core dump.  Dumping its
+	 * contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to
+	 * see what PC values meant.
+	 */
+	vdso_base = VDSO_BASE;
+	retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+					 VM_READ|VM_EXEC|
+					 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+					 VM_ALWAYSDUMP,
+					 vdso_pages);
+
+#ifndef __tilegx__
+	/*
+	 * Set up a user-interrupt mapping here; the user can't
+	 * create one themselves since it is above TASK_SIZE.
+	 * We make it unwritable by default, so the model for adding
+	 * interrupt vectors always involves an mprotect.
+	 */
+	if (!retval) {
+		unsigned long addr = MEM_USER_INTRPT;
+		addr = mmap_region(NULL, addr, INTRPT_SIZE,
+				   MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
+				   VM_READ|VM_EXEC|
+				   VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
+		if (addr > (unsigned long) -PAGE_SIZE)
+			retval = (int) addr;
+	}
+#endif
+
+	up_write(&mm->mmap_sem);
+
+	return retval;
+}
+
+
+void elf_plat_init(struct pt_regs *regs, unsigned long load_addr)
+{
+	/* Zero all registers. */
+	memset(regs, 0, sizeof(*regs));
+
+	/* Report the interpreter's load address. */
+	sim_notify_interp(load_addr);
+}
diff --git a/arch/tile/mm/extable.c b/arch/tile/mm/extable.c
new file mode 100644
index 0000000..4fb0acb
--- /dev/null
+++ b/arch/tile/mm/extable.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
new file mode 100644
index 0000000..9b6b92f
--- /dev/null
+++ b/arch/tile/mm/fault.c
@@ -0,0 +1,905 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * From i386 code copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/hugetlb.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+
+#include <arch/interrupts.h>
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out
+ */
+void bust_spinlocks(int yes)
+{
+	int loglevel_save = console_loglevel;
+
+	if (yes) {
+		oops_in_progress = 1;
+		return;
+	}
+	oops_in_progress = 0;
+	/*
+	 * OK, the message is on the console.  Now we call printk()
+	 * without oops_in_progress set so that printk will give klogd
+	 * a poke.  Hold onto your hats...
+	 */
+	console_loglevel = 15;	/* NMI oopser may have shut the console up */
+	printk(" ");
+	console_loglevel = loglevel_save;
+}
+
+static noinline void force_sig_info_fault(int si_signo, int si_code,
+	unsigned long address, int fault_num, struct task_struct *tsk)
+{
+	siginfo_t info;
+
+	if (unlikely(tsk->pid < 2)) {
+		panic("Signal %d (code %d) at %#lx sent to %s!",
+		      si_signo, si_code & 0xffff, address,
+		      tsk->pid ? "init" : "the idle task");
+	}
+
+	info.si_signo = si_signo;
+	info.si_errno = 0;
+	info.si_code = si_code;
+	info.si_addr = (void __user *)address;
+	info.si_trapno = fault_num;
+	force_sig_info(si_signo, &info, tsk);
+}
+
+#ifndef __tilegx__
+/*
+ * Synthesize the fault a PL0 process would get by doing a word-load of
+ * an unaligned address or a high kernel address.  Called indirectly
+ * from sys_cmpxchg() in kernel/intvec.S.
+ */
+int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *regs)
+{
+	if (address >= PAGE_OFFSET)
+		force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address,
+				     INT_DTLB_MISS, current);
+	else
+		force_sig_info_fault(SIGBUS, BUS_ADRALN, address,
+				     INT_UNALIGN_DATA, current);
+
+	/*
+	 * Adjust pc to point at the actual instruction, which is unusual
+	 * for syscalls normally, but is appropriate when we are claiming
+	 * that a syscall swint1 caused a page fault or bus error.
+	 */
+	regs->pc -= 8;
+
+	/*
+	 * Mark this as a caller-save interrupt, like a normal page fault,
+	 * so that when we go through the signal handler path we will
+	 * properly restore r0, r1, and r2 for the signal handler arguments.
+	 */
+	regs->flags |= PT_FLAGS_CALLER_SAVES;
+
+	return 0;
+}
+#endif
+
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+	if (!pmd_present(*pmd)) {
+		set_pmd(pmd, *pmd_k);
+		arch_flush_lazy_mmu_mode();
+	} else
+		BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
+	return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
+{
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	/* Make sure we are in vmalloc area */
+	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+		return -1;
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 */
+	pmd_k = vmalloc_sync_one(pgd, address);
+	if (!pmd_k)
+		return -1;
+	if (pmd_huge(*pmd_k))
+		return 0;   /* support TILE huge_vmap() API */
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+	return 0;
+}
+
+/* Wait until this PTE has completed migration. */
+static void wait_for_migration(pte_t *pte)
+{
+	if (pte_migrating(*pte)) {
+		/*
+		 * Wait until the migrater fixes up this pte.
+		 * We scale the loop count by the clock rate so we'll wait for
+		 * a few seconds here.
+		 */
+		int retries = 0;
+		int bound = get_clock_rate();
+		while (pte_migrating(*pte)) {
+			barrier();
+			if (++retries > bound)
+				panic("Hit migrating PTE (%#llx) and"
+				      " page PFN %#lx still migrating",
+				      pte->val, pte_pfn(*pte));
+		}
+	}
+}
+
+/*
+ * It's not generally safe to use "current" to get the page table pointer,
+ * since we might be running an oprofile interrupt in the middle of a
+ * task switch.
+ */
+static pgd_t *get_current_pgd(void)
+{
+	HV_Context ctx = hv_inquire_context();
+	unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
+	struct page *pgd_page = pfn_to_page(pgd_pfn);
+	BUG_ON(PageHighMem(pgd_page));   /* oops, HIGHPTE? */
+	return (pgd_t *) __va(ctx.page_table);
+}
+
+/*
+ * We can receive a page fault from a migrating PTE at any time.
+ * Handle it by just waiting until the fault resolves.
+ *
+ * It's also possible to get a migrating kernel PTE that resolves
+ * itself during the downcall from hypervisor to Linux.  We just check
+ * here to see if the PTE seems valid, and if so we retry it.
+ *
+ * NOTE! We MUST NOT take any locks for this case.  We may be in an
+ * interrupt or a critical region, and must do as little as possible.
+ * Similarly, we can't use atomic ops here, since we may be handling a
+ * fault caused by an atomic op access.
+ */
+static int handle_migrating_pte(pgd_t *pgd, int fault_num,
+				unsigned long address,
+				int is_kernel_mode, int write)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t pteval;
+
+	if (pgd_addr_invalid(address))
+		return 0;
+
+	pgd += pgd_index(address);
+	pud = pud_offset(pgd, address);
+	if (!pud || !pud_present(*pud))
+		return 0;
+	pmd = pmd_offset(pud, address);
+	if (!pmd || !pmd_present(*pmd))
+		return 0;
+	pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) :
+		pte_offset_kernel(pmd, address);
+	pteval = *pte;
+	if (pte_migrating(pteval)) {
+		wait_for_migration(pte);
+		return 1;
+	}
+
+	if (!is_kernel_mode || !pte_present(pteval))
+		return 0;
+	if (fault_num == INT_ITLB_MISS) {
+		if (pte_exec(pteval))
+			return 1;
+	} else if (write) {
+		if (pte_write(pteval))
+			return 1;
+	} else {
+		if (pte_read(pteval))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * This routine is responsible for faulting in user pages.
+ * It passes the work off to one of the appropriate routines.
+ * It returns true if the fault was successfully handled.
+ */
+static int handle_page_fault(struct pt_regs *regs,
+			     int fault_num,
+			     int is_page_fault,
+			     unsigned long address,
+			     int write)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	unsigned long stack_offset;
+	int fault;
+	int si_code;
+	int is_kernel_mode;
+	pgd_t *pgd;
+
+	/* on TILE, protection faults are always writes */
+	if (!is_page_fault)
+		write = 1;
+
+	is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
+
+	tsk = validate_current();
+
+	/*
+	 * Check to see if we might be overwriting the stack, and bail
+	 * out if so.  The page fault code is a relatively likely
+	 * place to get trapped in an infinite regress, and once we
+	 * overwrite the whole stack, it becomes very hard to recover.
+	 */
+	stack_offset = stack_pointer & (THREAD_SIZE-1);
+	if (stack_offset < THREAD_SIZE / 8) {
+		printk(KERN_ALERT "Potential stack overrun: sp %#lx\n",
+		       stack_pointer);
+		show_regs(regs);
+		printk(KERN_ALERT "Killing current process %d/%s\n",
+		       tsk->pid, tsk->comm);
+		do_group_exit(SIGKILL);
+	}
+
+	/*
+	 * Early on, we need to check for migrating PTE entries;
+	 * see homecache.c.  If we find a migrating PTE, we wait until
+	 * the backing page claims to be done migrating, then we procede.
+	 * For kernel PTEs, we rewrite the PTE and return and retry.
+	 * Otherwise, we treat the fault like a normal "no PTE" fault,
+	 * rather than trying to patch up the existing PTE.
+	 */
+	pgd = get_current_pgd();
+	if (handle_migrating_pte(pgd, fault_num, address,
+				 is_kernel_mode, write))
+		return 1;
+
+	si_code = SEGV_MAPERR;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * and that the fault was not a protection fault.
+	 */
+	if (unlikely(address >= TASK_SIZE &&
+		     !is_arch_mappable_range(address, 0))) {
+		if (is_kernel_mode && is_page_fault &&
+		    vmalloc_fault(pgd, address) >= 0)
+			return 1;
+		/*
+		 * Don't take the mm semaphore here. If we fixup a prefetch
+		 * fault we could otherwise deadlock.
+		 */
+		mm = NULL;  /* happy compiler */
+		vma = NULL;
+		goto bad_area_nosemaphore;
+	}
+
+	/*
+	 * If we're trying to touch user-space addresses, we must
+	 * be either at PL0, or else with interrupts enabled in the
+	 * kernel, so either way we can re-enable interrupts here.
+	 */
+	local_irq_enable();
+
+	mm = tsk->mm;
+
+	/*
+	 * If we're in an interrupt, have no user context or are running in an
+	 * atomic region then we must not take the fault.
+	 */
+	if (in_atomic() || !mm) {
+		vma = NULL;  /* happy compiler */
+		goto bad_area_nosemaphore;
+	}
+
+	/*
+	 * When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+	 * erroneous fault occurring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibility of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (is_kernel_mode &&
+		    !search_exception_tables(regs->pc)) {
+			vma = NULL;  /* happy compiler */
+			goto bad_area_nosemaphore;
+		}
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (regs->sp < PAGE_OFFSET) {
+		/*
+		 * accessing the stack below sp is always a bug.
+		 */
+		if (address < regs->sp)
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	si_code = SEGV_ACCERR;
+	if (fault_num == INT_ITLB_MISS) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else if (write) {
+#ifdef TEST_VERIFY_AREA
+		if (!is_page_fault && regs->cs == KERNEL_CS)
+			printk("WP fault at "REGFMT"\n", regs->eip);
+#endif
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!is_page_fault || !(vma->vm_flags & VM_READ))
+			goto bad_area;
+	}
+
+ survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, write);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR)
+		tsk->maj_flt++;
+	else
+		tsk->min_flt++;
+
+	/*
+	 * If this was an asynchronous fault,
+	 * restart the appropriate engine.
+	 */
+	switch (fault_num) {
+#if CHIP_HAS_TILE_DMA()
+	case INT_DMATLB_MISS:
+	case INT_DMATLB_MISS_DWNCL:
+	case INT_DMATLB_ACCESS:
+	case INT_DMATLB_ACCESS_DWNCL:
+		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+		break;
+#endif
+#if CHIP_HAS_SN_PROC()
+	case INT_SNITLB_MISS:
+	case INT_SNITLB_MISS_DWNCL:
+		__insn_mtspr(SPR_SNCTL,
+			     __insn_mfspr(SPR_SNCTL) &
+			     ~SPR_SNCTL__FRZPROC_MASK);
+		break;
+#endif
+	}
+
+	up_read(&mm->mmap_sem);
+	return 1;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (!is_kernel_mode) {
+		/*
+		 * It's possible to have interrupts off here.
+		 */
+		local_irq_enable();
+
+		force_sig_info_fault(SIGSEGV, si_code, address,
+				     fault_num, tsk);
+		return 0;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return 0;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+	bust_spinlocks(1);
+
+	/* FIXME: no lookup_address() yet */
+#ifdef SUPPORT_LOOKUP_ADDRESS
+	if (fault_num == INT_ITLB_MISS) {
+		pte_t *pte = lookup_address(address);
+
+		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+			printk(KERN_CRIT "kernel tried to execute"
+			       " non-executable page - exploit attempt?"
+			       " (uid: %d)\n", current->uid);
+	}
+#endif
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference\n");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request\n");
+	printk(" at virtual address "REGFMT", pc "REGFMT"\n",
+	       address, regs->pc);
+
+	show_regs(regs);
+
+	if (unlikely(tsk->pid < 2)) {
+		panic("Kernel page fault running %s!",
+		      tsk->pid ? "init" : "the idle task");
+	}
+
+	/*
+	 * More FIXME: we should probably copy the i386 here and
+	 * implement a generic die() routine.  Not today.
+	 */
+#ifdef SUPPORT_DIE
+	die("Oops", regs);
+#endif
+	bust_spinlocks(1);
+
+	do_group_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (is_global_init(tsk)) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (!is_kernel_mode)
+		do_group_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (is_kernel_mode)
+		goto no_context;
+
+	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk);
+	return 0;
+}
+
+#ifndef __tilegx__
+
+extern char sys_cmpxchg[], __sys_cmpxchg_end[];
+extern char __sys_cmpxchg_grab_lock[];
+extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
+
+/*
+ * We return this structure in registers to avoid having to write
+ * additional save/restore code in the intvec.S caller.
+ */
+struct intvec_state {
+	void *handler;
+	unsigned long vecnum;
+	unsigned long fault_num;
+	unsigned long info;
+	unsigned long retval;
+};
+
+/* We must release ICS before panicking or we won't get anywhere. */
+#define ics_panic(fmt, ...) do { \
+	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \
+	panic(fmt, __VA_ARGS__); \
+} while (0)
+
+void do_page_fault(struct pt_regs *regs, int fault_num,
+		   unsigned long address, unsigned long write);
+
+/*
+ * When we take an ITLB or DTLB fault or access violation in the
+ * supervisor while the critical section bit is set, the hypervisor is
+ * reluctant to write new values into the EX_CONTEXT_1_x registers,
+ * since that might indicate we have not yet squirreled the SPR
+ * contents away and can thus safely take a recursive interrupt.
+ * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
+ */
+struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
+				      unsigned long address,
+				      unsigned long info)
+{
+	unsigned long pc = info & ~1;
+	int write = info & 1;
+	pgd_t *pgd = get_current_pgd();
+
+	/* Retval is 1 at first since we will handle the fault fully. */
+	struct intvec_state state = {
+		do_page_fault, fault_num, address, write, 1
+	};
+
+	/* Validate that we are plausibly in the right routine. */
+	if ((pc & 0x7) != 0 || pc < PAGE_OFFSET ||
+	    (fault_num != INT_DTLB_MISS &&
+	     fault_num != INT_DTLB_ACCESS)) {
+		unsigned long old_pc = regs->pc;
+		regs->pc = pc;
+		ics_panic("Bad ICS page fault args:"
+			  " old PC %#lx, fault %d/%d at %#lx\n",
+			  old_pc, fault_num, write, address);
+	}
+
+	/* We might be faulting on a vmalloc page, so check that first. */
+	if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0)
+		return state;
+
+	/*
+	 * If we faulted with ICS set in sys_cmpxchg, we are providing
+	 * a user syscall service that should generate a signal on
+	 * fault.  We didn't set up a kernel stack on initial entry to
+	 * sys_cmpxchg, but instead had one set up by the fault, which
+	 * (because sys_cmpxchg never releases ICS) came to us via the
+	 * SYSTEM_SAVE_1_2 mechanism, and thus EX_CONTEXT_1_[01] are
+	 * still referencing the original user code.  We release the
+	 * atomic lock and rewrite pt_regs so that it appears that we
+	 * came from user-space directly, and after we finish the
+	 * fault we'll go back to user space and re-issue the swint.
+	 * This way the backtrace information is correct if we need to
+	 * emit a stack dump at any point while handling this.
+	 *
+	 * Must match register use in sys_cmpxchg().
+	 */
+	if (pc >= (unsigned long) sys_cmpxchg &&
+	    pc < (unsigned long) __sys_cmpxchg_end) {
+#ifdef CONFIG_SMP
+		/* Don't unlock before we could have locked. */
+		if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) {
+			int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
+			__atomic_fault_unlock(lock_ptr);
+		}
+#endif
+		regs->sp = regs->regs[27];
+	}
+
+	/*
+	 * We can also fault in the atomic assembly, in which
+	 * case we use the exception table to do the first-level fixup.
+	 * We may re-fixup again in the real fault handler if it
+	 * turns out the faulting address is just bad, and not,
+	 * for example, migrating.
+	 */
+	else if (pc >= (unsigned long) __start_atomic_asm_code &&
+		   pc < (unsigned long) __end_atomic_asm_code) {
+		const struct exception_table_entry *fixup;
+#ifdef CONFIG_SMP
+		/* Unlock the atomic lock. */
+		int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
+		__atomic_fault_unlock(lock_ptr);
+#endif
+		fixup = search_exception_tables(pc);
+		if (!fixup)
+			ics_panic("ICS atomic fault not in table:"
+				  " PC %#lx, fault %d", pc, fault_num);
+		regs->pc = fixup->fixup;
+		regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
+	}
+
+	/*
+	 * NOTE: the one other type of access that might bring us here
+	 * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
+	 * but we don't have to check specially for them since we can
+	 * always safely return to the address of the fault and retry,
+	 * since no separate atomic locks are involved.
+	 */
+
+	/*
+	 * Now that we have released the atomic lock (if necessary),
+	 * it's safe to spin if the PTE that caused the fault was migrating.
+	 */
+	if (fault_num == INT_DTLB_ACCESS)
+		write = 1;
+	if (handle_migrating_pte(pgd, fault_num, address, 1, write))
+		return state;
+
+	/* Return zero so that we continue on with normal fault handling. */
+	state.retval = 0;
+	return state;
+}
+
+#endif /* !__tilegx__ */
+
+/*
+ * This routine handles page faults.  It determines the address, and the
+ * problem, and then passes it handle_page_fault() for normal DTLB and
+ * ITLB issues, and for DMA or SN processor faults when we are in user
+ * space.  For the latter, if we're in kernel mode, we just save the
+ * interrupt away appropriately and return immediately.  We can't do
+ * page faults for user code while in kernel mode.
+ */
+void do_page_fault(struct pt_regs *regs, int fault_num,
+		   unsigned long address, unsigned long write)
+{
+	int is_page_fault;
+
+	/* This case should have been handled by do_page_fault_ics(). */
+	BUG_ON(write & ~1);
+
+#if CHIP_HAS_TILE_DMA()
+	/*
+	 * If it's a DMA fault, suspend the transfer while we're
+	 * handling the miss; we'll restart after it's handled.  If we
+	 * don't suspend, it's possible that this process could swap
+	 * out and back in, and restart the engine since the DMA is
+	 * still 'running'.
+	 */
+	if (fault_num == INT_DMATLB_MISS ||
+	    fault_num == INT_DMATLB_ACCESS ||
+	    fault_num == INT_DMATLB_MISS_DWNCL ||
+	    fault_num == INT_DMATLB_ACCESS_DWNCL) {
+		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
+		while (__insn_mfspr(SPR_DMA_USER_STATUS) &
+		       SPR_DMA_STATUS__BUSY_MASK)
+			;
+	}
+#endif
+
+	/* Validate fault num and decide if this is a first-time page fault. */
+	switch (fault_num) {
+	case INT_ITLB_MISS:
+	case INT_DTLB_MISS:
+#if CHIP_HAS_TILE_DMA()
+	case INT_DMATLB_MISS:
+	case INT_DMATLB_MISS_DWNCL:
+#endif
+#if CHIP_HAS_SN_PROC()
+	case INT_SNITLB_MISS:
+	case INT_SNITLB_MISS_DWNCL:
+#endif
+		is_page_fault = 1;
+		break;
+
+	case INT_DTLB_ACCESS:
+#if CHIP_HAS_TILE_DMA()
+	case INT_DMATLB_ACCESS:
+	case INT_DMATLB_ACCESS_DWNCL:
+#endif
+		is_page_fault = 0;
+		break;
+
+	default:
+		panic("Bad fault number %d in do_page_fault", fault_num);
+	}
+
+	if (EX1_PL(regs->ex1) != USER_PL) {
+		struct async_tlb *async;
+		switch (fault_num) {
+#if CHIP_HAS_TILE_DMA()
+		case INT_DMATLB_MISS:
+		case INT_DMATLB_ACCESS:
+		case INT_DMATLB_MISS_DWNCL:
+		case INT_DMATLB_ACCESS_DWNCL:
+			async = &current->thread.dma_async_tlb;
+			break;
+#endif
+#if CHIP_HAS_SN_PROC()
+		case INT_SNITLB_MISS:
+		case INT_SNITLB_MISS_DWNCL:
+			async = &current->thread.sn_async_tlb;
+			break;
+#endif
+		default:
+			async = NULL;
+		}
+		if (async) {
+
+			/*
+			 * No vmalloc check required, so we can allow
+			 * interrupts immediately at this point.
+			 */
+			local_irq_enable();
+
+			set_thread_flag(TIF_ASYNC_TLB);
+			if (async->fault_num != 0) {
+				panic("Second async fault %d;"
+				      " old fault was %d (%#lx/%ld)",
+				      fault_num, async->fault_num,
+				      address, write);
+			}
+			BUG_ON(fault_num == 0);
+			async->fault_num = fault_num;
+			async->is_fault = is_page_fault;
+			async->is_write = write;
+			async->address = address;
+			return;
+		}
+	}
+
+	handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
+
+
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+/*
+ * Check an async_tlb structure to see if a deferred fault is waiting,
+ * and if so pass it to the page-fault code.
+ */
+static void handle_async_page_fault(struct pt_regs *regs,
+				    struct async_tlb *async)
+{
+	if (async->fault_num) {
+		/*
+		 * Clear async->fault_num before calling the page-fault
+		 * handler so that if we re-interrupt before returning
+		 * from the function we have somewhere to put the
+		 * information from the new interrupt.
+		 */
+		int fault_num = async->fault_num;
+		async->fault_num = 0;
+		handle_page_fault(regs, fault_num, async->is_fault,
+				  async->address, async->is_write);
+	}
+}
+#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
+
+
+/*
+ * This routine effectively re-issues asynchronous page faults
+ * when we are returning to user space.
+ */
+void do_async_page_fault(struct pt_regs *regs)
+{
+	/*
+	 * Clear thread flag early.  If we re-interrupt while processing
+	 * code here, we will reset it and recall this routine before
+	 * returning to user space.
+	 */
+	clear_thread_flag(TIF_ASYNC_TLB);
+
+#if CHIP_HAS_TILE_DMA()
+	handle_async_page_fault(regs, &current->thread.dma_async_tlb);
+#endif
+#if CHIP_HAS_SN_PROC()
+	handle_async_page_fault(regs, &current->thread.sn_async_tlb);
+#endif
+}
+
+void vmalloc_sync_all(void)
+{
+#ifdef __tilegx__
+	/* Currently all L1 kernel pmd's are static and shared. */
+	BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
+#else
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = PAGE_OFFSET;
+	unsigned long address;
+
+	BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK);
+	for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			unsigned long flags;
+			struct list_head *pos;
+
+			spin_lock_irqsave(&pgd_lock, flags);
+			list_for_each(pos, &pgd_list)
+				if (!vmalloc_sync_one(list_to_pgd(pos),
+								address)) {
+					/* Must be at first entry in list. */
+					BUG_ON(pos != pgd_list.next);
+					break;
+				}
+			spin_unlock_irqrestore(&pgd_lock, flags);
+			if (pos != pgd_list.next)
+				set_bit(pgd_index(address), insync);
+		}
+		if (address == start && test_bit(pgd_index(address), insync))
+			start = address + PGDIR_SIZE;
+	}
+#endif
+}
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
new file mode 100644
index 0000000..1fcecc5
--- /dev/null
+++ b/arch/tile/mm/highmem.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <asm/homecache.h>
+
+#define kmap_get_pte(vaddr) \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
+		(vaddr)), (vaddr))
+
+
+void *kmap(struct page *page)
+{
+	void *kva;
+	unsigned long flags;
+	pte_t *ptep;
+
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	kva = kmap_high(page);
+
+	/*
+	 * Rewrite the PTE under the lock.  This ensures that the page
+	 * is not currently migrating.
+	 */
+	ptep = kmap_get_pte((unsigned long)kva);
+	flags = homecache_kpte_lock();
+	set_pte_at(&init_mm, kva, ptep, mk_pte(page, page_to_kpgprot(page)));
+	homecache_kpte_unlock(flags);
+
+	return kva;
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+	if (in_interrupt())
+		BUG();
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+static void debug_kmap_atomic_prot(enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+	static unsigned warn_count = 10;
+
+	if (unlikely(warn_count == 0))
+		return;
+
+	if (unlikely(in_interrupt())) {
+		if (in_irq()) {
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_BIO_SRC_IRQ &&
+			    /* type != KM_BIO_DST_IRQ && */
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		} else if (!irqs_disabled()) {	/* softirq */
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
+			    type != KM_SKB_SUNRPC_DATA &&
+			    type != KM_SKB_DATA_SOFTIRQ &&
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		}
+	}
+
+	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
+	    type == KM_BIO_SRC_IRQ /* || type == KM_BIO_DST_IRQ */) {
+		if (!irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
+		if (irq_count() == 0 && !irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	}
+#endif
+}
+
+/*
+ * Describe a single atomic mapping of a page on a given cpu at a
+ * given address, and allow it to be linked into a list.
+ */
+struct atomic_mapped_page {
+	struct list_head list;
+	struct page *page;
+	int cpu;
+	unsigned long va;
+};
+
+static spinlock_t amp_lock = __SPIN_LOCK_UNLOCKED(&amp_lock);
+static struct list_head amp_list = LIST_HEAD_INIT(amp_list);
+
+/*
+ * Combining this structure with a per-cpu declaration lets us give
+ * each cpu an atomic_mapped_page structure per type.
+ */
+struct kmap_amps {
+	struct atomic_mapped_page per_type[KM_TYPE_NR];
+};
+DEFINE_PER_CPU(struct kmap_amps, amps);
+
+/*
+ * Add a page and va, on this cpu, to the list of kmap_atomic pages,
+ * and write the new pte to memory.  Writing the new PTE under the
+ * lock guarantees that it is either on the list before migration starts
+ * (if we won the race), or set_pte() sets the migrating bit in the PTE
+ * (if we lost the race).  And doing it under the lock guarantees
+ * that when kmap_atomic_fix_one_pte() comes along, it finds a valid
+ * PTE in memory, iff the mapping is still on the amp_list.
+ *
+ * Finally, doing it under the lock lets us safely examine the page
+ * to see if it is immutable or not, for the generic kmap_atomic() case.
+ * If we examine it earlier we are exposed to a race where it looks
+ * writable earlier, but becomes immutable before we write the PTE.
+ */
+static void kmap_atomic_register(struct page *page, enum km_type type,
+				 unsigned long va, pte_t *ptep, pte_t pteval)
+{
+	unsigned long flags;
+	struct atomic_mapped_page *amp;
+
+	flags = homecache_kpte_lock();
+	spin_lock(&amp_lock);
+
+	/* With interrupts disabled, now fill in the per-cpu info. */
+	amp = &__get_cpu_var(amps).per_type[type];
+	amp->page = page;
+	amp->cpu = smp_processor_id();
+	amp->va = va;
+
+	/* For generic kmap_atomic(), choose the PTE writability now. */
+	if (!pte_read(pteval))
+		pteval = mk_pte(page, page_to_kpgprot(page));
+
+	list_add(&amp->list, &amp_list);
+	set_pte(ptep, pteval);
+	arch_flush_lazy_mmu_mode();
+
+	spin_unlock(&amp_lock);
+	homecache_kpte_unlock(flags);
+}
+
+/*
+ * Remove a page and va, on this cpu, from the list of kmap_atomic pages.
+ * Linear-time search, but we count on the lists being short.
+ * We don't need to adjust the PTE under the lock (as opposed to the
+ * kmap_atomic_register() case), since we're just unconditionally
+ * zeroing the PTE after it's off the list.
+ */
+static void kmap_atomic_unregister(struct page *page, unsigned long va)
+{
+	unsigned long flags;
+	struct atomic_mapped_page *amp;
+	int cpu = smp_processor_id();
+	spin_lock_irqsave(&amp_lock, flags);
+	list_for_each_entry(amp, &amp_list, list) {
+		if (amp->page == page && amp->cpu == cpu && amp->va == va)
+			break;
+	}
+	BUG_ON(&amp->list == &amp_list);
+	list_del(&amp->list);
+	spin_unlock_irqrestore(&amp_lock, flags);
+}
+
+/* Helper routine for kmap_atomic_fix_kpte(), below. */
+static void kmap_atomic_fix_one_kpte(struct atomic_mapped_page *amp,
+				     int finished)
+{
+	pte_t *ptep = kmap_get_pte(amp->va);
+	if (!finished) {
+		set_pte(ptep, pte_mkmigrate(*ptep));
+		flush_remote(0, 0, NULL, amp->va, PAGE_SIZE, PAGE_SIZE,
+			     cpumask_of(amp->cpu), NULL, 0);
+	} else {
+		/*
+		 * Rewrite a default kernel PTE for this page.
+		 * We rely on the fact that set_pte() writes the
+		 * present+migrating bits last.
+		 */
+		pte_t pte = mk_pte(amp->page, page_to_kpgprot(amp->page));
+		set_pte(ptep, pte);
+	}
+}
+
+/*
+ * This routine is a helper function for homecache_fix_kpte(); see
+ * its comments for more information on the "finished" argument here.
+ *
+ * Note that we hold the lock while doing the remote flushes, which
+ * will stall any unrelated cpus trying to do kmap_atomic operations.
+ * We could just update the PTEs under the lock, and save away copies
+ * of the structs (or just the va+cpu), then flush them after we
+ * release the lock, but it seems easier just to do it all under the lock.
+ */
+void kmap_atomic_fix_kpte(struct page *page, int finished)
+{
+	struct atomic_mapped_page *amp;
+	unsigned long flags;
+	spin_lock_irqsave(&amp_lock, flags);
+	list_for_each_entry(amp, &amp_list, list) {
+		if (amp->page == page)
+			kmap_atomic_fix_one_kpte(amp, finished);
+	}
+	spin_unlock_irqrestore(&amp_lock, flags);
+}
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap
+ * because the kmap code must perform a global TLB invalidation when
+ * the kmap pool wraps.
+ *
+ * Note that they may be slower than on x86 (etc.) because unlike on
+ * those platforms, we do have to take a global lock to map and unmap
+ * pages on Tile (see above).
+ *
+ * When holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+	pte_t *pte;
+
+	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	pagefault_disable();
+
+	/* Avoid icache flushes by disallowing atomic executable mappings. */
+	BUG_ON(pte_exec(prot));
+
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	debug_kmap_atomic_prot(type);
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	pte = kmap_get_pte(vaddr);
+	BUG_ON(!pte_none(*pte));
+
+	/* Register that this page is mapped atomically on this cpu. */
+	kmap_atomic_register(page, type, vaddr, pte, mk_pte(page, prot));
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic_prot);
+
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+	/* PAGE_NONE is a magic value that tells us to check immutability. */
+	return kmap_atomic_prot(page, type, PAGE_NONE);
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	/*
+	 * Force other mappings to Oops if they try to access this pte without
+	 * first remapping it.  Keeping stale mappings around is a bad idea.
+	 */
+	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) {
+		pte_t *pte = kmap_get_pte(vaddr);
+		pte_t pteval = *pte;
+		BUG_ON(!pte_present(pteval) && !pte_migrating(pteval));
+		kmap_atomic_unregister(pte_page(pteval), vaddr);
+		kpte_clear_flush(pte, vaddr);
+	} else {
+		/* Must be a lowmem page */
+		BUG_ON(vaddr < PAGE_OFFSET);
+		BUG_ON(vaddr >= (unsigned long)high_memory);
+	}
+
+	arch_flush_lazy_mmu_mode();
+	pagefault_enable();
+}
+EXPORT_SYMBOL(kunmap_atomic);
+
+/*
+ * This API is supposed to allow us to map memory without a "struct page".
+ * Currently we don't support this, though this may change in the future.
+ */
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+	return kmap_atomic(pfn_to_page(pfn), type);
+}
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+{
+	return kmap_atomic_prot(pfn_to_page(pfn), type, prot);
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+	pte_t *pte;
+	unsigned long vaddr = (unsigned long)ptr;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	pte = kmap_get_pte(vaddr);
+	return pte_page(*pte);
+}
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
new file mode 100644
index 0000000..52feb77
--- /dev/null
+++ b/arch/tile/mm/homecache.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This code maintains the "home" for each page in the system.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/bootmem.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/pagevec.h>
+#include <linux/ptrace.h>
+#include <linux/timex.h>
+#include <linux/cache.h>
+#include <linux/smp.h>
+
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/homecache.h>
+
+#include "migrate.h"
+
+
+#if CHIP_HAS_COHERENT_LOCAL_CACHE()
+
+/*
+ * The noallocl2 option suppresses all use of the L2 cache to cache
+ * locally from a remote home.  There's no point in using it if we
+ * don't have coherent local caching, though.
+ */
+int __write_once noallocl2;
+static int __init set_noallocl2(char *str)
+{
+	noallocl2 = 1;
+	return 0;
+}
+early_param("noallocl2", set_noallocl2);
+
+#else
+
+#define noallocl2 0
+
+#endif
+
+
+
+/* Provide no-op versions of these routines to keep flush_remote() cleaner. */
+#define mark_caches_evicted_start() 0
+#define mark_caches_evicted_finish(mask, timestamp) do {} while (0)
+
+
+
+
+/*
+ * Update the irq_stat for cpus that we are going to interrupt
+ * with TLB or cache flushes.  Also handle removing dataplane cpus
+ * from the TLB flush set, and setting dataplane_tlb_state instead.
+ */
+static void hv_flush_update(const struct cpumask *cache_cpumask,
+			    struct cpumask *tlb_cpumask,
+			    unsigned long tlb_va, unsigned long tlb_length,
+			    HV_Remote_ASID *asids, int asidcount)
+{
+	struct cpumask mask;
+	int i, cpu;
+
+	cpumask_clear(&mask);
+	if (cache_cpumask)
+		cpumask_or(&mask, &mask, cache_cpumask);
+	if (tlb_cpumask && tlb_length) {
+		cpumask_or(&mask, &mask, tlb_cpumask);
+	}
+
+	for (i = 0; i < asidcount; ++i)
+		cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask);
+
+	/*
+	 * Don't bother to update atomically; losing a count
+	 * here is not that critical.
+	 */
+	for_each_cpu(cpu, &mask)
+		++per_cpu(irq_stat, cpu).irq_hv_flush_count;
+}
+
+/*
+ * This wrapper function around hv_flush_remote() does several things:
+ *
+ *  - Provides a return value error-checking panic path, since
+ *    there's never any good reason for hv_flush_remote() to fail.
+ *  - Accepts a 32-bit PFN rather than a 64-bit PA, which generally
+ *    is the type that Linux wants to pass around anyway.
+ *  - Centralizes the mark_caches_evicted() handling.
+ *  - Canonicalizes that lengths of zero make cpumasks NULL.
+ *  - Handles deferring TLB flushes for dataplane tiles.
+ *  - Tracks remote interrupts in the per-cpu irq_cpustat_t.
+ *
+ * Note that we have to wait until the cache flush completes before
+ * updating the per-cpu last_cache_flush word, since otherwise another
+ * concurrent flush can race, conclude the flush has already
+ * completed, and start to use the page while it's still dirty
+ * remotely (running concurrently with the actual evict, presumably).
+ */
+void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
+		  const struct cpumask *cache_cpumask_orig,
+		  HV_VirtAddr tlb_va, unsigned long tlb_length,
+		  unsigned long tlb_pgsize,
+		  const struct cpumask *tlb_cpumask_orig,
+		  HV_Remote_ASID *asids, int asidcount)
+{
+	int rc;
+	int timestamp = 0;  /* happy compiler */
+	struct cpumask cache_cpumask_copy, tlb_cpumask_copy;
+	struct cpumask *cache_cpumask, *tlb_cpumask;
+	HV_PhysAddr cache_pa;
+	char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5];
+
+	mb();   /* provided just to simplify "magic hypervisor" mode */
+
+	/*
+	 * Canonicalize and copy the cpumasks.
+	 */
+	if (cache_cpumask_orig && cache_control) {
+		cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig);
+		cache_cpumask = &cache_cpumask_copy;
+	} else {
+		cpumask_clear(&cache_cpumask_copy);
+		cache_cpumask = NULL;
+	}
+	if (cache_cpumask == NULL)
+		cache_control = 0;
+	if (tlb_cpumask_orig && tlb_length) {
+		cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig);
+		tlb_cpumask = &tlb_cpumask_copy;
+	} else {
+		cpumask_clear(&tlb_cpumask_copy);
+		tlb_cpumask = NULL;
+	}
+
+	hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length,
+			asids, asidcount);
+	cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT;
+	if (cache_control & HV_FLUSH_EVICT_L2)
+		timestamp = mark_caches_evicted_start();
+	rc = hv_flush_remote(cache_pa, cache_control,
+			     cpumask_bits(cache_cpumask),
+			     tlb_va, tlb_length, tlb_pgsize,
+			     cpumask_bits(tlb_cpumask),
+			     asids, asidcount);
+	if (cache_control & HV_FLUSH_EVICT_L2)
+		mark_caches_evicted_finish(cache_cpumask, timestamp);
+	if (rc == 0)
+		return;
+	cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy);
+	cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy);
+
+	printk("hv_flush_remote(%#llx, %#lx, %p [%s],"
+	       " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n",
+	       cache_pa, cache_control, cache_cpumask, cache_buf,
+	       (unsigned long)tlb_va, tlb_length, tlb_pgsize,
+	       tlb_cpumask, tlb_buf,
+	       asids, asidcount, rc);
+	if (asidcount > 0) {
+		int i;
+		printk(" asids:");
+		for (i = 0; i < asidcount; ++i)
+			printk(" %d,%d,%d",
+			       asids[i].x, asids[i].y, asids[i].asid);
+		printk("\n");
+	}
+	panic("Unsafe to continue.");
+}
+
+void homecache_evict(const struct cpumask *mask)
+{
+	flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+}
+
+/* Return a mask of the cpus whose caches currently own these pages. */
+static void homecache_mask(struct page *page, int pages,
+			   struct cpumask *home_mask)
+{
+	int i;
+	cpumask_clear(home_mask);
+	for (i = 0; i < pages; ++i) {
+		int home = page_home(&page[i]);
+		if (home == PAGE_HOME_IMMUTABLE ||
+		    home == PAGE_HOME_INCOHERENT) {
+			cpumask_copy(home_mask, cpu_possible_mask);
+			return;
+		}
+#if CHIP_HAS_CBOX_HOME_MAP()
+		if (home == PAGE_HOME_HASH) {
+			cpumask_or(home_mask, home_mask, &hash_for_home_map);
+			continue;
+		}
+#endif
+		if (home == PAGE_HOME_UNCACHED)
+			continue;
+		BUG_ON(home < 0 || home >= NR_CPUS);
+		cpumask_set_cpu(home, home_mask);
+	}
+}
+
+/*
+ * Return the passed length, or zero if it's long enough that we
+ * believe we should evict the whole L2 cache.
+ */
+static unsigned long cache_flush_length(unsigned long length)
+{
+	return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length;
+}
+
+/* On the simulator, confirm lines have been evicted everywhere. */
+static void validate_lines_evicted(unsigned long pfn, size_t length)
+{
+	sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED,
+		    (HV_PhysAddr)pfn << PAGE_SHIFT, length);
+}
+
+/* Flush a page out of whatever cache(s) it is in. */
+void homecache_flush_cache(struct page *page, int order)
+{
+	int pages = 1 << order;
+	int length = cache_flush_length(pages * PAGE_SIZE);
+	unsigned long pfn = page_to_pfn(page);
+	struct cpumask home_mask;
+
+	homecache_mask(page, pages, &home_mask);
+	flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0);
+	validate_lines_evicted(pfn, pages * PAGE_SIZE);
+}
+
+
+/* Report the home corresponding to a given PTE. */
+static int pte_to_home(pte_t pte)
+{
+	if (hv_pte_get_nc(pte))
+		return PAGE_HOME_IMMUTABLE;
+	switch (hv_pte_get_mode(pte)) {
+	case HV_PTE_MODE_CACHE_TILE_L3:
+		return get_remote_cache_cpu(pte);
+	case HV_PTE_MODE_CACHE_NO_L3:
+		return PAGE_HOME_INCOHERENT;
+	case HV_PTE_MODE_UNCACHED:
+		return PAGE_HOME_UNCACHED;
+#if CHIP_HAS_CBOX_HOME_MAP()
+	case HV_PTE_MODE_CACHE_HASH_L3:
+		return PAGE_HOME_HASH;
+#endif
+	}
+	panic("Bad PTE %#llx\n", pte.val);
+}
+
+/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */
+pte_t pte_set_home(pte_t pte, int home)
+{
+	/* Check for non-linear file mapping "PTEs" and pass them through. */
+	if (pte_file(pte))
+		return pte;
+
+#if CHIP_HAS_MMIO()
+	/* Check for MMIO mappings and pass them through. */
+	if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO)
+		return pte;
+#endif
+
+
+	/*
+	 * Only immutable pages get NC mappings.  If we have a
+	 * non-coherent PTE, but the underlying page is not
+	 * immutable, it's likely the result of a forced
+	 * caching setting running up against ptrace setting
+	 * the page to be writable underneath.  In this case,
+	 * just keep the PTE coherent.
+	 */
+	if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) {
+		pte = hv_pte_clear_nc(pte);
+		printk("non-immutable page incoherently referenced: %#llx\n",
+		       pte.val);
+	}
+
+	switch (home) {
+
+	case PAGE_HOME_UNCACHED:
+		pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
+		break;
+
+	case PAGE_HOME_INCOHERENT:
+		pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+		break;
+
+	case PAGE_HOME_IMMUTABLE:
+		/*
+		 * We could home this page anywhere, since it's immutable,
+		 * but by default just home it to follow "hash_default".
+		 */
+		BUG_ON(hv_pte_get_writable(pte));
+		if (pte_get_forcecache(pte)) {
+			/* Upgrade "force any cpu" to "No L3" for immutable. */
+			if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3
+			    && pte_get_anyhome(pte)) {
+				pte = hv_pte_set_mode(pte,
+						      HV_PTE_MODE_CACHE_NO_L3);
+			}
+		} else
+#if CHIP_HAS_CBOX_HOME_MAP()
+		if (hash_default)
+			pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
+		else
+#endif
+			pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+		pte = hv_pte_set_nc(pte);
+		break;
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	case PAGE_HOME_HASH:
+		pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
+		break;
+#endif
+
+	default:
+		BUG_ON(home < 0 || home >= NR_CPUS ||
+		       !cpu_is_valid_lotar(home));
+		pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
+		pte = set_remote_cache_cpu(pte, home);
+		break;
+	}
+
+#if CHIP_HAS_NC_AND_NOALLOC_BITS()
+	if (noallocl2)
+		pte = hv_pte_set_no_alloc_l2(pte);
+
+	/* Simplify "no local and no l3" to "uncached" */
+	if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) &&
+	    hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
+		pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
+	}
+#endif
+
+	/* Checking this case here gives a better panic than from the hv. */
+	BUG_ON(hv_pte_get_mode(pte) == 0);
+
+	return pte;
+}
+
+/*
+ * The routines in this section are the "static" versions of the normal
+ * dynamic homecaching routines; they just set the home cache
+ * of a kernel page once, and require a full-chip cache/TLB flush,
+ * so they're not suitable for anything but infrequent use.
+ */
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
+#else
+static inline int initial_page_home(void) { return 0; }
+#endif
+
+int page_home(struct page *page)
+{
+	if (PageHighMem(page)) {
+		return initial_page_home();
+	} else {
+		unsigned long kva = (unsigned long)page_address(page);
+		return pte_to_home(*virt_to_pte(NULL, kva));
+	}
+}
+
+void homecache_change_page_home(struct page *page, int order, int home)
+{
+	int i, pages = (1 << order);
+	unsigned long kva;
+
+	BUG_ON(PageHighMem(page));
+	BUG_ON(page_count(page) > 1);
+	BUG_ON(page_mapcount(page) != 0);
+	kva = (unsigned long) page_address(page);
+	flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map,
+		     kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask,
+		     NULL, 0);
+
+	for (i = 0; i < pages; ++i, kva += PAGE_SIZE) {
+		pte_t *ptep = virt_to_pte(NULL, kva);
+		pte_t pteval = *ptep;
+		BUG_ON(!pte_present(pteval) || pte_huge(pteval));
+		*ptep = pte_set_home(pteval, home);
+	}
+}
+
+struct page *homecache_alloc_pages(gfp_t gfp_mask,
+				   unsigned int order, int home)
+{
+	struct page *page;
+	BUG_ON(gfp_mask & __GFP_HIGHMEM);   /* must be lowmem */
+	page = alloc_pages(gfp_mask, order);
+	if (page)
+		homecache_change_page_home(page, order, home);
+	return page;
+}
+
+struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
+					unsigned int order, int home)
+{
+	struct page *page;
+	BUG_ON(gfp_mask & __GFP_HIGHMEM);   /* must be lowmem */
+	page = alloc_pages_node(nid, gfp_mask, order);
+	if (page)
+		homecache_change_page_home(page, order, home);
+	return page;
+}
+
+void homecache_free_pages(unsigned long addr, unsigned int order)
+{
+	struct page *page;
+
+	if (addr == 0)
+		return;
+
+	VM_BUG_ON(!virt_addr_valid((void *)addr));
+	page = virt_to_page((void *)addr);
+	if (put_page_testzero(page)) {
+		int pages = (1 << order);
+		homecache_change_page_home(page, order, initial_page_home());
+		while (pages--)
+			__free_page(page++);
+	}
+}
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
new file mode 100644
index 0000000..c38570f
--- /dev/null
+++ b/arch/tile/mm/hugetlbpage.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * TILE Huge TLB Page Support for Kernel.
+ * Taken from i386 hugetlb implementation:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <linux/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+		      unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	/* We do not yet support multiple huge page sizes. */
+	BUG_ON(sz != PMD_SIZE);
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud)
+		pte = (pte_t *) pmd_alloc(mm, pud, addr);
+	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud))
+			pmd = pmd_offset(pud, addr);
+	}
+	return (pte_t *) pmd;
+}
+
+#ifdef HUGETLB_TEST
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	unsigned long start = address;
+	int length = 1;
+	int nr;
+	struct page *page;
+	struct vm_area_struct *vma;
+
+	vma = find_vma(mm, addr);
+	if (!vma || !is_vm_hugetlb_page(vma))
+		return ERR_PTR(-EINVAL);
+
+	pte = huge_pte_offset(mm, address);
+
+	/* hugetlb should be locked, and hence, prefaulted */
+	WARN_ON(!pte || pte_none(*pte));
+
+	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+	WARN_ON(!PageHead(page));
+
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+#else
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
+}
+
+int pud_huge(pud_t pud)
+{
+	return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+			     pud_t *pud, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pud);
+	if (page)
+		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+#endif
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+		unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+
+	if (len > mm->cached_hole_size) {
+		start_addr = mm->free_area_cache;
+	} else {
+		start_addr = TASK_UNMAPPED_BASE;
+		mm->cached_hole_size = 0;
+	}
+
+full_search:
+	addr = ALIGN(start_addr, huge_page_size(h));
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = TASK_UNMAPPED_BASE;
+				mm->cached_hole_size = 0;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+			mm->cached_hole_size = vma->vm_start - addr;
+		addr = ALIGN(vma->vm_end, huge_page_size(h));
+	}
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+		unsigned long addr0, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev_vma;
+	unsigned long base = mm->mmap_base, addr = addr0;
+	unsigned long largest_hole = mm->cached_hole_size;
+	int first_time = 1;
+
+	/* don't allow allocations above current base */
+	if (mm->free_area_cache > base)
+		mm->free_area_cache = base;
+
+	if (len <= largest_hole) {
+		largest_hole = 0;
+		mm->free_area_cache  = base;
+	}
+try_again:
+	/* make sure it can fit in the remaining address space */
+	if (mm->free_area_cache < len)
+		goto fail;
+
+	/* either no address requested or cant fit in requested address hole */
+	addr = (mm->free_area_cache - len) & huge_page_mask(h);
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * i.e. return with success:
+		 */
+		vma = find_vma_prev(mm, addr, &prev_vma);
+		if (!vma) {
+			return addr;
+			break;
+		}
+
+		/*
+		 * new region fits between prev_vma->vm_end and
+		 * vma->vm_start, use it:
+		 */
+		if (addr + len <= vma->vm_start &&
+			    (!prev_vma || (addr >= prev_vma->vm_end))) {
+			/* remember the address as a hint for next time */
+			mm->cached_hole_size = largest_hole;
+			mm->free_area_cache = addr;
+			return addr;
+		} else {
+			/* pull free_area_cache down to the first hole */
+			if (mm->free_area_cache == vma->vm_end) {
+				mm->free_area_cache = vma->vm_start;
+				mm->cached_hole_size = largest_hole;
+			}
+		}
+
+		/* remember the largest hole we saw so far */
+		if (addr + largest_hole < vma->vm_start)
+			largest_hole = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = (vma->vm_start - len) & huge_page_mask(h);
+
+	} while (len <= vma->vm_start);
+
+fail:
+	/*
+	 * if hint left us with no space for the requested
+	 * mapping then try again:
+	 */
+	if (first_time) {
+		mm->free_area_cache = base;
+		largest_hole = 0;
+		first_time = 0;
+		goto try_again;
+	}
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	mm->cached_hole_size = ~0UL;
+	addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
+			len, pgoff, flags);
+
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		return hugetlb_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
+			ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
new file mode 100644
index 0000000..125ac53
--- /dev/null
+++ b/arch/tile/mm/init.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (C) 1995  Linus Torvalds
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/poison.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+#include <linux/memory_hotplug.h>
+#include <linux/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/homecache.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+#include "migrate.h"
+
+/*
+ * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
+ * in the Tile Kconfig, but this generates configure warnings.
+ * Do it here and force people to get it right to compile this file.
+ * The problem is that with 4KB small pages and 16MB huge pages,
+ * the default value doesn't allow us to group enough small pages
+ * together to make up a huge page.
+ */
+#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
+# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
+#endif
+
+#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
+
+unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+/* Create an L2 page table */
+static pte_t * __init alloc_pte(void)
+{
+	return __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0);
+}
+
+/*
+ * L2 page tables per controller.  We allocate these all at once from
+ * the bootmem allocator and store them here.  This saves on kernel L2
+ * page table memory, compared to allocating a full 64K page per L2
+ * page table, and also means that in cases where we use huge pages,
+ * we are guaranteed to later be able to shatter those huge pages and
+ * switch to using these page tables instead, without requiring
+ * further allocation.  Each l2_ptes[] entry points to the first page
+ * table for the first hugepage-size piece of memory on the
+ * controller; other page tables are just indexed directly, i.e. the
+ * L2 page tables are contiguous in memory for each controller.
+ */
+static pte_t *l2_ptes[MAX_NUMNODES];
+static int num_l2_ptes[MAX_NUMNODES];
+
+static void init_prealloc_ptes(int node, int pages)
+{
+	BUG_ON(pages & (HV_L2_ENTRIES-1));
+	if (pages) {
+		num_l2_ptes[node] = pages;
+		l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t),
+						HV_PAGE_TABLE_ALIGN, 0);
+	}
+}
+
+pte_t *get_prealloc_pte(unsigned long pfn)
+{
+	int node = pfn_to_nid(pfn);
+	pfn &= ~(-1UL << (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT));
+	BUG_ON(node >= MAX_NUMNODES);
+	BUG_ON(pfn >= num_l2_ptes[node]);
+	return &l2_ptes[node][pfn];
+}
+
+/*
+ * What caching do we expect pages from the heap to have when
+ * they are allocated during bootup?  (Once we've installed the
+ * "real" swapper_pg_dir.)
+ */
+static int initial_heap_home(void)
+{
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (hash_default)
+		return PAGE_HOME_HASH;
+#endif
+	return smp_processor_id();
+}
+
+/*
+ * Place a pointer to an L2 page table in a middle page
+ * directory entry.
+ */
+static void __init assign_pte(pmd_t *pmd, pte_t *page_table)
+{
+	phys_addr_t pa = __pa(page_table);
+	unsigned long l2_ptfn = pa >> HV_LOG2_PAGE_TABLE_ALIGN;
+	pte_t pteval = hv_pte_set_ptfn(__pgprot(_PAGE_TABLE), l2_ptfn);
+	BUG_ON((pa & (HV_PAGE_TABLE_ALIGN-1)) != 0);
+	pteval = pte_set_home(pteval, initial_heap_home());
+	*(pte_t *)pmd = pteval;
+	if (page_table != (pte_t *)pmd_page_vaddr(*pmd))
+		BUG();
+}
+
+#ifdef __tilegx__
+
+#if HV_L1_SIZE != HV_L2_SIZE
+# error Rework assumption that L1 and L2 page tables are same size.
+#endif
+
+/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */
+static inline pmd_t *alloc_pmd(void)
+{
+	return (pmd_t *)alloc_pte();
+}
+
+static inline void assign_pmd(pud_t *pud, pmd_t *pmd)
+{
+	assign_pte((pmd_t *)pud, (pte_t *)pmd);
+}
+
+#endif /* __tilegx__ */
+
+/* Replace the given pmd with a full PTE table. */
+void __init shatter_pmd(pmd_t *pmd)
+{
+	pte_t *pte = get_prealloc_pte(pte_pfn(*(pte_t *)pmd));
+	assign_pte(pmd, pte);
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * This function initializes a certain range of kernel virtual memory
+ * with new bootmem page tables, everywhere page tables are missing in
+ * the given range.
+ */
+
+/*
+ * NOTE: The pagetables are allocated contiguous on the physical space
+ * so we can cache the place of the first one and move around without
+ * checking the pgd every time.
+ */
+static void __init page_table_range_init(unsigned long start,
+					 unsigned long end, pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	int pgd_idx;
+	unsigned long vaddr;
+
+	vaddr = start;
+	pgd_idx = pgd_index(vaddr);
+	pgd = pgd_base + pgd_idx;
+
+	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+		pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr);
+		if (pmd_none(*pmd))
+			assign_pte(pmd, alloc_pte());
+		vaddr += PMD_SIZE;
+	}
+}
+#endif /* CONFIG_HIGHMEM */
+
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+
+static int __initdata ktext_hash = 1;  /* .text pages */
+static int __initdata kdata_hash = 1;  /* .data and .bss pages */
+int __write_once hash_default = 1;     /* kernel allocator pages */
+EXPORT_SYMBOL(hash_default);
+int __write_once kstack_hash = 1;      /* if no homecaching, use h4h */
+#endif /* CHIP_HAS_CBOX_HOME_MAP */
+
+/*
+ * CPUs to use to for striping the pages of kernel data.  If hash-for-home
+ * is available, this is only relevant if kcache_hash sets up the
+ * .data and .bss to be page-homed, and we don't want the default mode
+ * of using the full set of kernel cpus for the striping.
+ */
+static __initdata struct cpumask kdata_mask;
+static __initdata int kdata_arg_seen;
+
+int __write_once kdata_huge;       /* if no homecaching, small pages */
+
+
+/* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */
+static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
+{
+	prot = pte_set_home(prot, home);
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (home == PAGE_HOME_IMMUTABLE) {
+		if (ktext_hash)
+			prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
+		else
+			prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
+	}
+#endif
+	return prot;
+}
+
+/*
+ * For a given kernel data VA, how should it be cached?
+ * We return the complete pgprot_t with caching bits set.
+ */
+static pgprot_t __init init_pgprot(ulong address)
+{
+	int cpu;
+	unsigned long page;
+	enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	/* For kdata=huge, everything is just hash-for-home. */
+	if (kdata_huge)
+		return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+
+	/* We map the aliased pages of permanent text inaccessible. */
+	if (address < (ulong) _sinittext - CODE_DELTA)
+		return PAGE_NONE;
+
+	/*
+	 * We map read-only data non-coherent for performance.  We could
+	 * use neighborhood caching on TILE64, but it's not clear it's a win.
+	 */
+	if ((address >= (ulong) __start_rodata &&
+	     address < (ulong) __end_rodata) ||
+	    address == (ulong) empty_zero_page) {
+		return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE);
+	}
+
+	/* As a performance optimization, keep the boot init stack here. */
+	if (address >= (ulong)&init_thread_union &&
+	    address < (ulong)&init_thread_union + THREAD_SIZE)
+		return construct_pgprot(PAGE_KERNEL, smp_processor_id());
+
+#ifndef __tilegx__
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	/* Force the atomic_locks[] array page to be hash-for-home. */
+	if (address == (ulong) atomic_locks)
+		return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+#endif
+
+	/*
+	 * Everything else that isn't data or bss is heap, so mark it
+	 * with the initial heap home (hash-for-home, or this cpu).  This
+	 * includes any addresses after the loaded image; any address before
+	 * _einittext (since we already captured the case of text before
+	 * _sinittext); and any init-data pages.
+	 *
+	 * All the LOWMEM pages that we mark this way will get their
+	 * struct page homecache properly marked later, in set_page_homes().
+	 * The HIGHMEM pages we leave with a default zero for their
+	 * homes, but with a zero free_time we don't have to actually
+	 * do a flush action the first time we use them, either.
+	 */
+	if (address >= (ulong) _end || address < (ulong) _sdata ||
+	    (address >= (ulong) _sinitdata &&
+	     address < (ulong) _einitdata))
+		return construct_pgprot(PAGE_KERNEL, initial_heap_home());
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	/* Use hash-for-home if requested for data/bss. */
+	if (kdata_hash)
+		return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+
+	/*
+	 * Otherwise we just hand out consecutive cpus.  To avoid
+	 * requiring this function to hold state, we just walk forward from
+	 * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach
+	 * the requested address, while walking cpu home around kdata_mask.
+	 * This is typically no more than a dozen or so iterations.
+	 */
+	BUG_ON(_einitdata != __bss_start);
+	for (page = (ulong)_sdata, cpu = NR_CPUS; ; ) {
+		cpu = cpumask_next(cpu, &kdata_mask);
+		if (cpu == NR_CPUS)
+			cpu = cpumask_first(&kdata_mask);
+		if (page >= address)
+			break;
+		page += PAGE_SIZE;
+		if (page == (ulong)__start_rodata)
+			page = (ulong)__end_rodata;
+		if (page == (ulong)&init_thread_union)
+			page += THREAD_SIZE;
+		if (page == (ulong)_sinitdata)
+			page = (ulong)_einitdata;
+		if (page == (ulong)empty_zero_page)
+			page += PAGE_SIZE;
+#ifndef __tilegx__
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+		if (page == (ulong)atomic_locks)
+			page += PAGE_SIZE;
+#endif
+#endif
+
+	}
+	return construct_pgprot(PAGE_KERNEL, cpu);
+}
+
+/*
+ * This function sets up how we cache the kernel text.  If we have
+ * hash-for-home support, normally that is used instead (see the
+ * kcache_hash boot flag for more information).  But if we end up
+ * using a page-based caching technique, this option sets up the
+ * details of that.  In addition, the "ktext=nocache" option may
+ * always be used to disable local caching of text pages, if desired.
+ */
+
+static int __initdata ktext_arg_seen;
+static int __initdata ktext_small;
+static int __initdata ktext_local;
+static int __initdata ktext_all;
+static int __initdata ktext_nondataplane;
+static int __initdata ktext_nocache;
+static struct cpumask __initdata ktext_mask;
+
+static int __init setup_ktext(char *str)
+{
+	if (str == NULL)
+		return -EINVAL;
+
+	/* If you have a leading "nocache", turn off ktext caching */
+	if (strncmp(str, "nocache", 7) == 0) {
+		ktext_nocache = 1;
+		printk("ktext: disabling local caching of kernel text\n");
+		str += 7;
+		if (*str == ',')
+			++str;
+		if (*str == '\0')
+			return 0;
+	}
+
+	ktext_arg_seen = 1;
+
+	/* Default setting on Tile64: use a huge page */
+	if (strcmp(str, "huge") == 0)
+		printk("ktext: using one huge locally cached page\n");
+
+	/* Pay TLB cost but get no cache benefit: cache small pages locally */
+	else if (strcmp(str, "local") == 0) {
+		ktext_small = 1;
+		ktext_local = 1;
+		printk("ktext: using small pages with local caching\n");
+	}
+
+	/* Neighborhood cache ktext pages on all cpus. */
+	else if (strcmp(str, "all") == 0) {
+		ktext_small = 1;
+		ktext_all = 1;
+		printk("ktext: using maximal caching neighborhood\n");
+	}
+
+
+	/* Neighborhood ktext pages on specified mask */
+	else if (cpulist_parse(str, &ktext_mask) == 0) {
+		char buf[NR_CPUS * 5];
+		cpulist_scnprintf(buf, sizeof(buf), &ktext_mask);
+		if (cpumask_weight(&ktext_mask) > 1) {
+			ktext_small = 1;
+			printk("ktext: using caching neighborhood %s "
+			       "with small pages\n", buf);
+		} else {
+			printk("ktext: caching on cpu %s with one huge page\n",
+			       buf);
+		}
+	}
+
+	else if (*str)
+		return -EINVAL;
+
+	return 0;
+}
+
+early_param("ktext", setup_ktext);
+
+
+static inline pgprot_t ktext_set_nocache(pgprot_t prot)
+{
+	if (!ktext_nocache)
+		prot = hv_pte_set_nc(prot);
+#if CHIP_HAS_NC_AND_NOALLOC_BITS()
+	else
+		prot = hv_pte_set_no_alloc_l2(prot);
+#endif
+	return prot;
+}
+
+#ifndef __tilegx__
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+	return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
+}
+#else
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+	pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
+	if (pud_none(*pud))
+		assign_pmd(pud, alloc_pmd());
+	return pmd_offset(pud, va);
+}
+#endif
+
+/* Temporary page table we use for staging. */
+static pgd_t pgtables[PTRS_PER_PGD]
+ __attribute__((section(".init.page")));
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET.
+ *
+ * This routine transitions us from using a set of compiled-in large
+ * pages to using some more precise caching, including removing access
+ * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START)
+ * marking read-only data as locally cacheable, striping the remaining
+ * .data and .bss across all the available tiles, and removing access
+ * to pages above the top of RAM (thus ensuring a page fault from a bad
+ * virtual address rather than a hypervisor shoot down for accessing
+ * memory outside the assigned limits).
+ */
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+{
+	unsigned long address, pfn;
+	pmd_t *pmd;
+	pte_t *pte;
+	int pte_ofs;
+	const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id());
+	struct cpumask kstripe_mask;
+	int rc, i;
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (ktext_arg_seen && ktext_hash) {
+		printk("warning: \"ktext\" boot argument ignored"
+		       " if \"kcache_hash\" sets up text hash-for-home\n");
+		ktext_small = 0;
+	}
+
+	if (kdata_arg_seen && kdata_hash) {
+		printk("warning: \"kdata\" boot argument ignored"
+		       " if \"kcache_hash\" sets up data hash-for-home\n");
+	}
+
+	if (kdata_huge && !hash_default) {
+		printk("warning: disabling \"kdata=huge\"; requires"
+		       " kcache_hash=all or =allbutstack\n");
+		kdata_huge = 0;
+	}
+#endif
+
+	/*
+	 * Set up a mask for cpus to use for kernel striping.
+	 * This is normally all cpus, but minus dataplane cpus if any.
+	 * If the dataplane covers the whole chip, we stripe over
+	 * the whole chip too.
+	 */
+	cpumask_copy(&kstripe_mask, cpu_possible_mask);
+	if (!kdata_arg_seen)
+		kdata_mask = kstripe_mask;
+
+	/* Allocate and fill in L2 page tables */
+	for (i = 0; i < MAX_NUMNODES; ++i) {
+#ifdef CONFIG_HIGHMEM
+		unsigned long end_pfn = node_lowmem_end_pfn[i];
+#else
+		unsigned long end_pfn = node_end_pfn[i];
+#endif
+		unsigned long end_huge_pfn = 0;
+
+		/* Pre-shatter the last huge page to allow per-cpu pages. */
+		if (kdata_huge)
+			end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT);
+
+		pfn = node_start_pfn[i];
+
+		/* Allocate enough memory to hold L2 page tables for node. */
+		init_prealloc_ptes(i, end_pfn - pfn);
+
+		address = (unsigned long) pfn_to_kaddr(pfn);
+		while (pfn < end_pfn) {
+			BUG_ON(address & (HPAGE_SIZE-1));
+			pmd = get_pmd(pgtables, address);
+			pte = get_prealloc_pte(pfn);
+			if (pfn < end_huge_pfn) {
+				pgprot_t prot = init_pgprot(address);
+				*(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot));
+				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE;
+				     pfn++, pte_ofs++, address += PAGE_SIZE)
+					pte[pte_ofs] = pfn_pte(pfn, prot);
+			} else {
+				if (kdata_huge)
+					printk(KERN_DEBUG "pre-shattered huge"
+					       " page at %#lx\n", address);
+				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE;
+				     pfn++, pte_ofs++, address += PAGE_SIZE) {
+					pgprot_t prot = init_pgprot(address);
+					pte[pte_ofs] = pfn_pte(pfn, prot);
+				}
+				assign_pte(pmd, pte);
+			}
+		}
+	}
+
+	/*
+	 * Set or check ktext_map now that we have cpu_possible_mask
+	 * and kstripe_mask to work with.
+	 */
+	if (ktext_all)
+		cpumask_copy(&ktext_mask, cpu_possible_mask);
+	else if (ktext_nondataplane)
+		ktext_mask = kstripe_mask;
+	else if (!cpumask_empty(&ktext_mask)) {
+		/* Sanity-check any mask that was requested */
+		struct cpumask bad;
+		cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask);
+		cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask);
+		if (!cpumask_empty(&bad)) {
+			char buf[NR_CPUS * 5];
+			cpulist_scnprintf(buf, sizeof(buf), &bad);
+			printk("ktext: not using unavailable cpus %s\n", buf);
+		}
+		if (cpumask_empty(&ktext_mask)) {
+			printk("ktext: no valid cpus; caching on %d.\n",
+			       smp_processor_id());
+			cpumask_copy(&ktext_mask,
+				     cpumask_of(smp_processor_id()));
+		}
+	}
+
+	address = MEM_SV_INTRPT;
+	pmd = get_pmd(pgtables, address);
+	if (ktext_small) {
+		/* Allocate an L2 PTE for the kernel text */
+		int cpu = 0;
+		pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC,
+						 PAGE_HOME_IMMUTABLE);
+
+		if (ktext_local) {
+			if (ktext_nocache)
+				prot = hv_pte_set_mode(prot,
+						       HV_PTE_MODE_UNCACHED);
+			else
+				prot = hv_pte_set_mode(prot,
+						       HV_PTE_MODE_CACHE_NO_L3);
+		} else {
+			prot = hv_pte_set_mode(prot,
+					       HV_PTE_MODE_CACHE_TILE_L3);
+			cpu = cpumask_first(&ktext_mask);
+
+			prot = ktext_set_nocache(prot);
+		}
+
+		BUG_ON(address != (unsigned long)_stext);
+		pfn = 0;  /* code starts at PA 0 */
+		pte = alloc_pte();
+		for (pte_ofs = 0; address < (unsigned long)_einittext;
+		     pfn++, pte_ofs++, address += PAGE_SIZE) {
+			if (!ktext_local) {
+				prot = set_remote_cache_cpu(prot, cpu);
+				cpu = cpumask_next(cpu, &ktext_mask);
+				if (cpu == NR_CPUS)
+					cpu = cpumask_first(&ktext_mask);
+			}
+			pte[pte_ofs] = pfn_pte(pfn, prot);
+		}
+		assign_pte(pmd, pte);
+	} else {
+		pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
+		pteval = pte_mkhuge(pteval);
+#if CHIP_HAS_CBOX_HOME_MAP()
+		if (ktext_hash) {
+			pteval = hv_pte_set_mode(pteval,
+						 HV_PTE_MODE_CACHE_HASH_L3);
+			pteval = ktext_set_nocache(pteval);
+		} else
+#endif /* CHIP_HAS_CBOX_HOME_MAP() */
+		if (cpumask_weight(&ktext_mask) == 1) {
+			pteval = set_remote_cache_cpu(pteval,
+					      cpumask_first(&ktext_mask));
+			pteval = hv_pte_set_mode(pteval,
+						 HV_PTE_MODE_CACHE_TILE_L3);
+			pteval = ktext_set_nocache(pteval);
+		} else if (ktext_nocache)
+			pteval = hv_pte_set_mode(pteval,
+						 HV_PTE_MODE_UNCACHED);
+		else
+			pteval = hv_pte_set_mode(pteval,
+						 HV_PTE_MODE_CACHE_NO_L3);
+		*(pte_t *)pmd = pteval;
+	}
+
+	/* Set swapper_pgprot here so it is flushed to memory right away. */
+	swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir);
+
+	/*
+	 * Since we may be changing the caching of the stack and page
+	 * table itself, we invoke an assembly helper to do the
+	 * following steps:
+	 *
+	 *  - flush the cache so we start with an empty slate
+	 *  - install pgtables[] as the real page table
+	 *  - flush the TLB so the new page table takes effect
+	 */
+	rc = flush_and_install_context(__pa(pgtables),
+				       init_pgprot((unsigned long)pgtables),
+				       __get_cpu_var(current_asid),
+				       cpumask_bits(my_cpu_mask));
+	BUG_ON(rc != 0);
+
+	/* Copy the page table back to the normal swapper_pg_dir. */
+	memcpy(pgd_base, pgtables, sizeof(pgtables));
+	__install_page_table(pgd_base, __get_cpu_var(current_asid),
+			     swapper_pgprot);
+}
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ * On Tile, the only valid things for which we can just hand out unchecked
+ * PTEs are the kernel code and data.  Anything else might change its
+ * homing with time, and we wouldn't know to adjust the /dev/mem PTEs.
+ * Note that init_thread_union is released to heap soon after boot,
+ * so we include it in the init data.
+ *
+ * For TILE-Gx, we might want to consider allowing access to PA
+ * regions corresponding to PCI space, etc.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	return pagenr < kaddr_to_pfn(_end) &&
+		!(pagenr >= kaddr_to_pfn(&init_thread_union) ||
+		  pagenr < kaddr_to_pfn(_einitdata)) &&
+		!(pagenr >= kaddr_to_pfn(_sinittext) ||
+		  pagenr <= kaddr_to_pfn(_einittext-1));
+}
+
+#ifdef CONFIG_HIGHMEM
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long vaddr;
+
+	vaddr = PKMAP_BASE;
+	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	pte = pte_offset_kernel(pmd, vaddr);
+	pkmap_page_table = pte;
+}
+#endif /* CONFIG_HIGHMEM */
+
+
+static void __init init_free_pfn_range(unsigned long start, unsigned long end)
+{
+	unsigned long pfn;
+	struct page *page = pfn_to_page(start);
+
+	for (pfn = start; pfn < end; ) {
+		/* Optimize by freeing pages in large batches */
+		int order = __ffs(pfn);
+		int count, i;
+		struct page *p;
+
+		if (order >= MAX_ORDER)
+			order = MAX_ORDER-1;
+		count = 1 << order;
+		while (pfn + count > end) {
+			count >>= 1;
+			--order;
+		}
+		for (p = page, i = 0; i < count; ++i, ++p) {
+			__ClearPageReserved(p);
+			/*
+			 * Hacky direct set to avoid unnecessary
+			 * lock take/release for EVERY page here.
+			 */
+			p->_count.counter = 0;
+			p->_mapcount.counter = -1;
+		}
+		init_page_count(page);
+		__free_pages(page, order);
+		totalram_pages += count;
+
+		page += count;
+		pfn += count;
+	}
+}
+
+static void __init set_non_bootmem_pages_init(void)
+{
+	struct zone *z;
+	for_each_zone(z) {
+		unsigned long start, end;
+		int nid = z->zone_pgdat->node_id;
+
+		start = z->zone_start_pfn;
+		if (start == 0)
+			continue;  /* bootmem */
+		end = start + z->spanned_pages;
+		if (zone_idx(z) == ZONE_NORMAL) {
+			BUG_ON(start != node_start_pfn[nid]);
+			start = node_free_pfn[nid];
+		}
+#ifdef CONFIG_HIGHMEM
+		if (zone_idx(z) == ZONE_HIGHMEM)
+			totalhigh_pages += z->spanned_pages;
+#endif
+		if (kdata_huge) {
+			unsigned long percpu_pfn = node_percpu_pfn[nid];
+			if (start < percpu_pfn && end > percpu_pfn)
+				end = percpu_pfn;
+		}
+#ifdef CONFIG_PCI
+		if (start <= pci_reserve_start_pfn &&
+		    end > pci_reserve_start_pfn) {
+			if (end > pci_reserve_end_pfn)
+				init_free_pfn_range(pci_reserve_end_pfn, end);
+			end = pci_reserve_start_pfn;
+		}
+#endif
+		init_free_pfn_range(start, end);
+	}
+}
+
+/*
+ * paging_init() sets up the page tables - note that all of lowmem is
+ * already mapped by head.S.
+ */
+void __init paging_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long vaddr, end;
+#endif
+#ifdef __tilegx__
+	pud_t *pud;
+#endif
+	pgd_t *pgd_base = swapper_pg_dir;
+
+	kernel_physical_mapping_init(pgd_base);
+
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Fixed mappings, only the page table structure has to be
+	 * created - mappings will be set by set_fixmap():
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	page_table_range_init(vaddr, end, pgd_base);
+	permanent_kmaps_init(pgd_base);
+#endif
+
+#ifdef __tilegx__
+	/*
+	 * Since GX allocates just one pmd_t array worth of vmalloc space,
+	 * we go ahead and allocate it statically here, then share it
+	 * globally.  As a result we don't have to worry about any task
+	 * changing init_mm once we get up and running, and there's no
+	 * need for e.g. vmalloc_sync_all().
+	 */
+	BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END));
+	pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START);
+	assign_pmd(pud, alloc_pmd());
+#endif
+}
+
+
+/*
+ * Walk the kernel page tables and derive the page_home() from
+ * the PTEs, so that set_pte() can properly validate the caching
+ * of all PTEs it sees.
+ */
+void __init set_page_homes(void)
+{
+}
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_FLATMEM
+	max_mapnr = max_low_pfn;
+#endif
+}
+
+void __init mem_init(void)
+{
+	int codesize, datasize, initsize;
+	int i;
+#ifndef __tilegx__
+	void *last;
+#endif
+
+#ifdef CONFIG_FLATMEM
+	if (!mem_map)
+		BUG();
+#endif
+
+#ifdef CONFIG_HIGHMEM
+	/* check that fixmap and pkmap do not overlap */
+	if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) {
+		printk(KERN_ERR "fixmap and kmap areas overlap"
+		       " - this will crash\n");
+		printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+		       PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1),
+		       FIXADDR_START);
+		BUG();
+	}
+#endif
+
+	set_max_mapnr_init();
+
+	/* this will put all bootmem onto the freelists */
+	totalram_pages += free_all_bootmem();
+
+	/* count all remaining LOWMEM and give all HIGHMEM to page allocator */
+	set_non_bootmem_pages_init();
+
+	codesize =  (unsigned long)&_etext - (unsigned long)&_text;
+	datasize =  (unsigned long)&_end - (unsigned long)&_sdata;
+	initsize =  (unsigned long)&_einittext - (unsigned long)&_sinittext;
+	initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		datasize >> 10,
+		initsize >> 10,
+		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+	       );
+
+	/*
+	 * In debug mode, dump some interesting memory mappings.
+	 */
+#ifdef CONFIG_HIGHMEM
+	printk(KERN_DEBUG "  KMAP    %#lx - %#lx\n",
+	       FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1);
+	printk(KERN_DEBUG "  PKMAP   %#lx - %#lx\n",
+	       PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1);
+#endif
+#ifdef CONFIG_HUGEVMAP
+	printk(KERN_DEBUG "  HUGEMAP %#lx - %#lx\n",
+	       HUGE_VMAP_BASE, HUGE_VMAP_END - 1);
+#endif
+	printk(KERN_DEBUG "  VMALLOC %#lx - %#lx\n",
+	       _VMALLOC_START, _VMALLOC_END - 1);
+#ifdef __tilegx__
+	for (i = MAX_NUMNODES-1; i >= 0; --i) {
+		struct pglist_data *node = &node_data[i];
+		if (node->node_present_pages) {
+			unsigned long start = (unsigned long)
+				pfn_to_kaddr(node->node_start_pfn);
+			unsigned long end = start +
+				(node->node_present_pages << PAGE_SHIFT);
+			printk(KERN_DEBUG "  MEM%d    %#lx - %#lx\n",
+			       i, start, end - 1);
+		}
+	}
+#else
+	last = high_memory;
+	for (i = MAX_NUMNODES-1; i >= 0; --i) {
+		if ((unsigned long)vbase_map[i] != -1UL) {
+			printk(KERN_DEBUG "  LOWMEM%d %#lx - %#lx\n",
+			       i, (unsigned long) (vbase_map[i]),
+			       (unsigned long) (last-1));
+			last = vbase_map[i];
+		}
+	}
+#endif
+
+#ifndef __tilegx__
+	/*
+	 * Convert from using one lock for all atomic operations to
+	 * one per cpu.
+	 */
+	__init_atomic_per_cpu();
+#endif
+}
+
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int arch_add_memory(u64 start, u64 size)
+{
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+	return -EINVAL;
+}
+#endif
+
+struct kmem_cache *pgd_cache;
+
+void __init pgtable_cache_init(void)
+{
+	pgd_cache = kmem_cache_create("pgd",
+				PTRS_PER_PGD*sizeof(pgd_t),
+				PTRS_PER_PGD*sizeof(pgd_t),
+				0,
+				NULL);
+	if (!pgd_cache)
+		panic("pgtable_cache_init(): Cannot create pgd cache");
+}
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+/*
+ * The __w1data area holds data that is only written during initialization,
+ * and is read-only and thus freely cacheable thereafter.  Fix the page
+ * table entries that cover that region accordingly.
+ */
+static void mark_w1data_ro(void)
+{
+	/* Loop over page table entries */
+	unsigned long addr = (unsigned long)__w1data_begin;
+	BUG_ON((addr & (PAGE_SIZE-1)) != 0);
+	for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
+		unsigned long pfn = kaddr_to_pfn((void *)addr);
+		struct page *page = pfn_to_page(pfn);
+		pte_t *ptep = virt_to_pte(NULL, addr);
+		BUG_ON(pte_huge(*ptep));   /* not relevant for kdata_huge */
+		set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
+	}
+}
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static long __write_once initfree;
+#else
+static long __write_once initfree = 1;
+#endif
+
+/* Select whether to free (1) or mark unusable (0) the __init pages. */
+static int __init set_initfree(char *str)
+{
+	strict_strtol(str, 0, &initfree);
+	printk("initfree: %s free init pages\n", initfree ? "will" : "won't");
+	return 1;
+}
+__setup("initfree=", set_initfree);
+
+static void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr = (unsigned long) begin;
+
+	if (kdata_huge && !initfree) {
+		printk("Warning: ignoring initfree=0:"
+		       " incompatible with kdata=huge\n");
+		initfree = 1;
+	}
+	end = (end + PAGE_SIZE - 1) & PAGE_MASK;
+	local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin);
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		/*
+		 * Note we just reset the home here directly in the
+		 * page table.  We know this is safe because our caller
+		 * just flushed the caches on all the other cpus,
+		 * and they won't be touching any of these pages.
+		 */
+		int pfn = kaddr_to_pfn((void *)addr);
+		struct page *page = pfn_to_page(pfn);
+		pte_t *ptep = virt_to_pte(NULL, addr);
+		if (!initfree) {
+			/*
+			 * If debugging page accesses then do not free
+			 * this memory but mark them not present - any
+			 * buggy init-section access will create a
+			 * kernel page fault:
+			 */
+			pte_clear(&init_mm, addr, ptep);
+			continue;
+		}
+		__ClearPageReserved(page);
+		init_page_count(page);
+		if (pte_huge(*ptep))
+			BUG_ON(!kdata_huge);
+		else
+			set_pte_at(&init_mm, addr, ptep,
+				   pfn_pte(pfn, PAGE_KERNEL));
+		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+	const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET;
+
+	/*
+	 * Evict the dirty initdata on the boot cpu, evict the w1data
+	 * wherever it's homed, and evict all the init code everywhere.
+	 * We are guaranteed that no one will touch the init pages any
+	 * more, and although other cpus may be touching the w1data,
+	 * we only actually change the caching on tile64, which won't
+	 * be keeping local copies in the other tiles' caches anyway.
+	 */
+	homecache_evict(&cpu_cacheable_map);
+
+	/* Free the data pages that we won't use again after init. */
+	free_init_pages("unused kernel data",
+			(unsigned long)_sinitdata,
+			(unsigned long)_einitdata);
+
+	/*
+	 * Free the pages mapped from 0xc0000000 that correspond to code
+	 * pages from 0xfd000000 that we won't use again after init.
+	 */
+	free_init_pages("unused kernel text",
+			(unsigned long)_sinittext - text_delta,
+			(unsigned long)_einittext - text_delta);
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+	/*
+	 * Upgrade the .w1data section to globally cached.
+	 * We don't do this on tilepro, since the cache architecture
+	 * pretty much makes it irrelevant, and in any case we end
+	 * up having racing issues with other tiles that may touch
+	 * the data after we flush the cache but before we update
+	 * the PTEs and flush the TLBs, causing sharer shootdowns
+	 * later.  Even though this is to clean data, it seems like
+	 * an unnecessary complication.
+	 */
+	mark_w1data_ro();
+#endif
+
+	/* Do a global TLB flush so everyone sees the changes. */
+	flush_tlb_all();
+}
diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h
new file mode 100644
index 0000000..cd45a08
--- /dev/null
+++ b/arch/tile/mm/migrate.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Structure definitions for migration, exposed here for use by
+ * arch/tile/kernel/asm-offsets.c.
+ */
+
+#ifndef MM_MIGRATE_H
+#define MM_MIGRATE_H
+
+#include <linux/cpumask.h>
+#include <hv/hypervisor.h>
+
+/*
+ * This function is used as a helper when setting up the initial
+ * page table (swapper_pg_dir).
+ */
+extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
+				     HV_ASID asid,
+				     const unsigned long *cpumask);
+
+/*
+ * This function supports migration as a "helper" as follows:
+ *
+ *  - Set the stack PTE itself to "migrating".
+ *  - Do a global TLB flush for (va,length) and the specified ASIDs.
+ *  - Do a cache-evict on all necessary cpus.
+ *  - Write the new stack PTE.
+ *
+ * Note that any non-NULL pointers must not point to the page that
+ * is handled by the stack_pte itself.
+ */
+extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va,
+				     size_t length, pte_t *stack_ptep,
+				     const struct cpumask *cache_cpumask,
+				     const struct cpumask *tlb_cpumask,
+				     HV_Remote_ASID *asids,
+				     int asidcount);
+
+#endif /* MM_MIGRATE_H */
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
new file mode 100644
index 0000000..f738765
--- /dev/null
+++ b/arch/tile/mm/migrate_32.S
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This routine is a helper for migrating the home of a set of pages to
+ * a new cpu.  See the documentation in homecache.c for more information.
+ */
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+
+	.text
+
+/*
+ * First, some definitions that apply to all the code in the file.
+ */
+
+/* Locals (caller-save) */
+#define r_tmp		r10
+#define r_save_sp	r11
+
+/* What we save where in the stack frame; must include all callee-saves. */
+#define FRAME_SP	4
+#define FRAME_R30	8
+#define FRAME_R31	12
+#define FRAME_R32	16
+#define FRAME_R33	20
+#define FRAME_R34	24
+#define FRAME_R35	28
+#define FRAME_SIZE	32
+
+
+
+
+/*
+ * On entry:
+ *
+ *   r0 low word of the new context PA to install (moved to r_context_lo)
+ *   r1 high word of the new context PA to install (moved to r_context_hi)
+ *   r2 low word of PTE to use for context access (moved to r_access_lo)
+ *   r3 high word of PTE to use for context access (moved to r_access_lo)
+ *   r4 ASID to use for new context (moved to r_asid)
+ *   r5 pointer to cpumask with just this cpu set in it (r_my_cpumask)
+ */
+
+/* Arguments (caller-save) */
+#define r_context_lo_in	r0
+#define r_context_hi_in	r1
+#define r_access_lo_in	r2
+#define r_access_hi_in	r3
+#define r_asid_in	r4
+#define r_my_cpumask	r5
+
+/* Locals (callee-save); must not be more than FRAME_xxx above. */
+#define r_save_ics	r30
+#define r_context_lo	r31
+#define r_context_hi	r32
+#define r_access_lo	r33
+#define r_access_hi	r34
+#define r_asid		r35
+
+STD_ENTRY(flush_and_install_context)
+	/*
+	 * Create a stack frame; we can't touch it once we flush the
+	 * cache until we install the new page table and flush the TLB.
+	 */
+	{
+	 move r_save_sp, sp
+	 sw sp, lr
+	 addi sp, sp, -FRAME_SIZE
+	}
+	addi r_tmp, sp, FRAME_SP
+	{
+	 sw r_tmp, r_save_sp
+	 addi r_tmp, sp, FRAME_R30
+	}
+	{
+	 sw r_tmp, r30
+	 addi r_tmp, sp, FRAME_R31
+	}
+	{
+	 sw r_tmp, r31
+	 addi r_tmp, sp, FRAME_R32
+	}
+	{
+	 sw r_tmp, r32
+	 addi r_tmp, sp, FRAME_R33
+	}
+	{
+	 sw r_tmp, r33
+	 addi r_tmp, sp, FRAME_R34
+	}
+	{
+	 sw r_tmp, r34
+	 addi r_tmp, sp, FRAME_R35
+	}
+	sw r_tmp, r35
+
+	/* Move some arguments to callee-save registers. */
+	{
+	 move r_context_lo, r_context_lo_in
+	 move r_context_hi, r_context_hi_in
+	}
+	{
+	 move r_access_lo, r_access_lo_in
+	 move r_access_hi, r_access_hi_in
+	}
+	move r_asid, r_asid_in
+
+	/* Disable interrupts, since we can't use our stack. */
+	{
+	 mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
+	 movei r_tmp, 1
+	}
+	mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
+
+	/* First, flush our L2 cache. */
+	{
+	 move r0, zero  /* cache_pa */
+	 move r1, zero
+	}
+	{
+	 auli r2, zero, ha16(HV_FLUSH_EVICT_L2)  /* cache_control */
+	 move r3, r_my_cpumask  /* cache_cpumask */
+	}
+	{
+	 move r4, zero  /* tlb_va */
+	 move r5, zero  /* tlb_length */
+	}
+	{
+	 move r6, zero  /* tlb_pgsize */
+	 move r7, zero  /* tlb_cpumask */
+	}
+	{
+	 move r8, zero  /* asids */
+	 move r9, zero  /* asidcount */
+	}
+	jal hv_flush_remote
+	bnz r0, .Ldone
+
+	/* Now install the new page table. */
+	{
+	 move r0, r_context_lo
+	 move r1, r_context_hi
+	}
+	{
+	 move r2, r_access_lo
+	 move r3, r_access_hi
+	}
+	{
+	 move r4, r_asid
+	 movei r5, HV_CTX_DIRECTIO
+	}
+	jal hv_install_context
+	bnz r0, .Ldone
+
+	/* Finally, flush the TLB. */
+	{
+	 movei r0, 0   /* preserve_global */
+	 jal hv_flush_all
+	}
+
+.Ldone:
+	/* Reset interrupts back how they were before. */
+	mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
+
+	/* Restore the callee-saved registers and return. */
+	addli lr, sp, FRAME_SIZE
+	{
+	 lw lr, lr
+	 addli r_tmp, sp, FRAME_R30
+	}
+	{
+	 lw r30, r_tmp
+	 addli r_tmp, sp, FRAME_R31
+	}
+	{
+	 lw r31, r_tmp
+	 addli r_tmp, sp, FRAME_R32
+	}
+	{
+	 lw r32, r_tmp
+	 addli r_tmp, sp, FRAME_R33
+	}
+	{
+	 lw r33, r_tmp
+	 addli r_tmp, sp, FRAME_R34
+	}
+	{
+	 lw r34, r_tmp
+	 addli r_tmp, sp, FRAME_R35
+	}
+	{
+	 lw r35, r_tmp
+	 addi sp, sp, FRAME_SIZE
+	}
+	jrp lr
+	STD_ENDPROC(flush_and_install_context)
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
new file mode 100644
index 0000000..f96f4ce
--- /dev/null
+++ b/arch/tile/mm/mmap.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Taken from the i386 architecture and simplified.
+ */
+
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
+#include <linux/mman.h>
+#include <linux/compat.h>
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(struct mm_struct *mm)
+{
+	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long random_factor = 0;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = get_random_int() % (1024*1024);
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+#if !defined(__tilegx__)
+	int is_32bit = 1;
+#elif defined(CONFIG_COMPAT)
+	int is_32bit = is_compat_task();
+#else
+	int is_32bit = 0;
+#endif
+
+	/*
+	 * Use standard layout if the expected stack growth is unlimited
+	 * or we are running native 64 bits.
+	 */
+	if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base(mm);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+	}
+}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
new file mode 100644
index 0000000..289e729
--- /dev/null
+++ b/arch/tile/mm/pgtable.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+#define K(x) ((x) << (PAGE_SHIFT-10))
+
+/*
+ * The normal show_free_areas() is too verbose on Tile, with dozens
+ * of processors and often four NUMA zones each with high and lowmem.
+ */
+void show_mem(void)
+{
+	struct zone *zone;
+
+	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
+	       " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
+	       " pagecache:%lu swap:%lu\n",
+	       (global_page_state(NR_ACTIVE_ANON) +
+		global_page_state(NR_ACTIVE_FILE)),
+	       (global_page_state(NR_INACTIVE_ANON) +
+		global_page_state(NR_INACTIVE_FILE)),
+	       global_page_state(NR_FILE_DIRTY),
+	       global_page_state(NR_WRITEBACK),
+	       global_page_state(NR_UNSTABLE_NFS),
+	       global_page_state(NR_FREE_PAGES),
+	       (global_page_state(NR_SLAB_RECLAIMABLE) +
+		global_page_state(NR_SLAB_UNRECLAIMABLE)),
+	       global_page_state(NR_FILE_MAPPED),
+	       global_page_state(NR_PAGETABLE),
+	       global_page_state(NR_BOUNCE),
+	       global_page_state(NR_FILE_PAGES),
+	       nr_swap_pages);
+
+	for_each_zone(zone) {
+		unsigned long flags, order, total = 0, largest_order = -1;
+
+		if (!populated_zone(zone))
+			continue;
+
+		printk("Node %d %7s: ", zone_to_nid(zone), zone->name);
+		spin_lock_irqsave(&zone->lock, flags);
+		for (order = 0; order < MAX_ORDER; order++) {
+			int nr = zone->free_area[order].nr_free;
+			total += nr << order;
+			if (nr)
+				largest_order = order;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+		printk("%lukB (largest %luKb)\n",
+		       K(total), largest_order ? K(1UL) << largest_order : 0);
+	}
+}
+
+/*
+ * Associate a virtual page frame with a given physical page frame
+ * and protection flags for that frame.
+ */
+static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		BUG();
+		return;
+	}
+	pud = pud_offset(pgd, vaddr);
+	if (pud_none(*pud)) {
+		BUG();
+		return;
+	}
+	pmd = pmd_offset(pud, vaddr);
+	if (pmd_none(*pmd)) {
+		BUG();
+		return;
+	}
+	pte = pte_offset_kernel(pmd, vaddr);
+	/* <pfn,flags> stored as-is, to permit clearing entries */
+	set_pte(pte, pfn_pte(pfn, flags));
+
+	/*
+	 * It's enough to flush this one mapping.
+	 * This appears conservative since it is only called
+	 * from __set_fixmap.
+	 */
+	local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
+}
+
+/*
+ * Associate a huge virtual page frame with a given physical page frame
+ * and protection flags for that frame. pfn is for the base of the page,
+ * vaddr is what the page gets mapped to - both must be properly aligned.
+ * The pmd must already be instantiated.
+ */
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (vaddr & (PMD_SIZE-1)) {		/* vaddr is misaligned */
+		printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
+		return; /* BUG(); */
+	}
+	if (pfn & (PTRS_PER_PTE-1)) {		/* pfn is misaligned */
+		printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
+		return; /* BUG(); */
+	}
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
+		return; /* BUG(); */
+	}
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(pfn), flags));
+	/*
+	 * It's enough to flush this one mapping.
+	 * We flush both small and huge TSBs to be sure.
+	 */
+	local_flush_tlb_page(NULL, vaddr, HPAGE_SIZE);
+	local_flush_tlb_pages(NULL, vaddr, PAGE_SIZE, HPAGE_SIZE);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+}
+
+#if defined(CONFIG_HIGHPTE)
+pte_t *_pte_offset_map(pmd_t *dir, unsigned long address, enum km_type type)
+{
+	pte_t *pte = kmap_atomic(pmd_page(*dir), type) +
+		(pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
+	return &pte[pte_index(address)];
+}
+#endif
+
+/*
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * The locking scheme was chosen on the basis of manfred's
+ * recommendations and having no core impact whatsoever.
+ * -- wli
+ */
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	list_add(pgd_to_list(pgd), &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	list_del(pgd_to_list(pgd));
+}
+
+#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
+
+static void pgd_ctor(pgd_t *pgd)
+{
+	unsigned long flags;
+
+	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
+	spin_lock_irqsave(&pgd_lock, flags);
+
+#ifndef __tilegx__
+	/*
+	 * Check that the user interrupt vector has no L2.
+	 * It never should for the swapper, and new page tables
+	 * should always start with an empty user interrupt vector.
+	 */
+	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
+#endif
+
+	clone_pgd_range(pgd + KERNEL_PGD_INDEX_START,
+			swapper_pg_dir + KERNEL_PGD_INDEX_START,
+			KERNEL_PGD_PTRS);
+
+	pgd_list_add(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void pgd_dtor(pgd_t *pgd)
+{
+	unsigned long flags; /* can be called from interrupt context */
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+	if (pgd)
+		pgd_ctor(pgd);
+	return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	pgd_dtor(pgd);
+	kmem_cache_free(pgd_cache, pgd);
+}
+
+
+#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	int flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP;
+	struct page *p;
+
+#ifdef CONFIG_HIGHPTE
+	flags |= __GFP_HIGHMEM;
+#endif
+
+	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
+	if (p == NULL)
+		return NULL;
+
+	pgtable_page_ctor(p);
+	return p;
+}
+
+/*
+ * Free page immediately (used in __pte_alloc if we raced with another
+ * process).  We have to correct whatever pte_alloc_one() did before
+ * returning the pages to the allocator.
+ */
+void pte_free(struct mm_struct *mm, struct page *p)
+{
+	pgtable_page_dtor(p);
+	__free_pages(p, L2_USER_PGTABLE_ORDER);
+}
+
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+		    unsigned long address)
+{
+	int i;
+
+	pgtable_page_dtor(pte);
+	tlb->need_flush = 1;
+	if (tlb_fast_mode(tlb)) {
+		struct page *pte_pages[L2_USER_PGTABLE_PAGES];
+		for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i)
+			pte_pages[i] = pte + i;
+		free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES);
+		return;
+	}
+	for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) {
+		tlb->pages[tlb->nr++] = pte + i;
+		if (tlb->nr >= FREE_PTE_NR)
+			tlb_flush_mmu(tlb, 0, 0);
+	}
+}
+
+#ifndef __tilegx__
+
+/*
+ * FIXME: needs to be atomic vs hypervisor writes.  For now we make the
+ * window of vulnerability a bit smaller by doing an unlocked 8-bit update.
+ */
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long addr, pte_t *ptep)
+{
+#if HV_PTE_INDEX_ACCESSED < 8 || HV_PTE_INDEX_ACCESSED >= 16
+# error Code assumes HV_PTE "accessed" bit in second byte
+#endif
+	u8 *tmp = (u8 *)ptep;
+	u8 second_byte = tmp[1];
+	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
+		return 0;
+	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
+	return 1;
+}
+
+/*
+ * This implementation is atomic vs hypervisor writes, since the hypervisor
+ * always writes the low word (where "accessed" and "dirty" are) and this
+ * routine only writes the high word.
+ */
+void ptep_set_wrprotect(struct mm_struct *mm,
+			unsigned long addr, pte_t *ptep)
+{
+#if HV_PTE_INDEX_WRITABLE < 32
+# error Code assumes HV_PTE "writable" bit in high word
+#endif
+	u32 *tmp = (u32 *)ptep;
+	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
+}
+
+#endif
+
+pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (pgd_addr_invalid(addr))
+		return NULL;
+
+	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_huge_page(*pmd))
+		return (pte_t *)pmd;
+	if (!pmd_present(*pmd))
+		return NULL;
+	return pte_offset_kernel(pmd, addr);
+}
+
+pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
+{
+	unsigned int width = smp_width;
+	int x = cpu % width;
+	int y = cpu / width;
+	BUG_ON(y >= smp_height);
+	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
+	BUG_ON(cpu < 0 || cpu >= NR_CPUS);
+	BUG_ON(!cpu_is_valid_lotar(cpu));
+	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
+}
+
+int get_remote_cache_cpu(pgprot_t prot)
+{
+	HV_LOTAR lotar = hv_pte_get_lotar(prot);
+	int x = HV_LOTAR_X(lotar);
+	int y = HV_LOTAR_Y(lotar);
+	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
+	return x + y * smp_width;
+}
+
+void set_pte_order(pte_t *ptep, pte_t pte, int order)
+{
+	unsigned long pfn = pte_pfn(pte);
+	struct page *page = pfn_to_page(pfn);
+
+	/* Update the home of a PTE if necessary */
+	pte = pte_set_home(pte, page_home(page));
+
+#ifdef __tilegx__
+	*ptep = pte;
+#else
+	/*
+	 * When setting a PTE, write the high bits first, then write
+	 * the low bits.  This sets the "present" bit only after the
+	 * other bits are in place.  If a particular PTE update
+	 * involves transitioning from one valid PTE to another, it
+	 * may be necessary to call set_pte_order() more than once,
+	 * transitioning via a suitable intermediate state.
+	 * Note that this sequence also means that if we are transitioning
+	 * from any migrating PTE to a non-migrating one, we will not
+	 * see a half-updated PTE with the migrating bit off.
+	 */
+#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
+# error Must write the present and migrating bits last
+#endif
+	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
+	barrier();
+	((u32 *)ptep)[0] = (u32)(pte_val(pte));
+#endif
+}
+
+/* Can this mm load a PTE with cached_priority set? */
+static inline int mm_is_priority_cached(struct mm_struct *mm)
+{
+	return mm->context.priority_cached;
+}
+
+/*
+ * Add a priority mapping to an mm_context and
+ * notify the hypervisor if this is the first one.
+ */
+void start_mm_caching(struct mm_struct *mm)
+{
+	if (!mm_is_priority_cached(mm)) {
+		mm->context.priority_cached = -1U;
+		hv_set_caching(-1U);
+	}
+}
+
+/*
+ * Validate and return the priority_cached flag.  We know if it's zero
+ * that we don't need to scan, since we immediately set it non-zero
+ * when we first consider a MAP_CACHE_PRIORITY mapping.
+ *
+ * We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
+ * since we're in an interrupt context (servicing switch_mm) we don't
+ * worry about it and don't unset the "priority_cached" field.
+ * Presumably we'll come back later and have more luck and clear
+ * the value then; for now we'll just keep the cache marked for priority.
+ */
+static unsigned int update_priority_cached(struct mm_struct *mm)
+{
+	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
+		struct vm_area_struct *vm;
+		for (vm = mm->mmap; vm; vm = vm->vm_next) {
+			if (hv_pte_get_cached_priority(vm->vm_page_prot))
+				break;
+		}
+		if (vm == NULL)
+			mm->context.priority_cached = 0;
+		up_write(&mm->mmap_sem);
+	}
+	return mm->context.priority_cached;
+}
+
+/* Set caching correctly for an mm that we are switching to. */
+void check_mm_caching(struct mm_struct *prev, struct mm_struct *next)
+{
+	if (!mm_is_priority_cached(next)) {
+		/*
+		 * If the new mm doesn't use priority caching, just see if we
+		 * need the hv_set_caching(), or can assume it's already zero.
+		 */
+		if (mm_is_priority_cached(prev))
+			hv_set_caching(0);
+	} else {
+		hv_set_caching(update_priority_cached(next));
+	}
+}
+
+#if CHIP_HAS_MMIO()
+
+/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+			   pgprot_t home)
+{
+	void *addr;
+	struct vm_struct *area;
+	unsigned long offset, last_addr;
+	pgprot_t pgprot;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
+	pgprot = PAGE_KERNEL;
+	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
+	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP /* | other flags? */);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = area->addr;
+	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+			       phys_addr, pgprot)) {
+		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+		return NULL;
+	}
+	return (__force void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+/* Map a PCI MMIO bus address into VA space. */
+void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
+{
+	panic("ioremap for PCI MMIO is not supported");
+}
+EXPORT_SYMBOL(ioremap);
+
+/* Unmap an MMIO VA mapping. */
+void iounmap(volatile void __iomem *addr_in)
+{
+	volatile void __iomem *addr = (volatile void __iomem *)
+		(PAGE_MASK & (unsigned long __force)addr_in);
+#if 1
+	vunmap((void * __force)addr);
+#else
+	/* x86 uses this complicated flow instead of vunmap().  Is
+	 * there any particular reason we should do the same? */
+	struct vm_struct *p, *o;
+
+	/* Use the vm area unlocked, assuming the caller
+	   ensures there isn't another iounmap for the same address
+	   in parallel. Reuse of the virtual address is prevented by
+	   leaving it in the global lists until we're done with it.
+	   cpa takes care of the direct mappings. */
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		if (p->addr == addr)
+			break;
+	}
+	read_unlock(&vmlist_lock);
+
+	if (!p) {
+		printk("iounmap: bad address %p\n", addr);
+		dump_stack();
+		return;
+	}
+
+	/* Finally remove it */
+	o = remove_vm_area((void *)addr);
+	BUG_ON(p != o || o == NULL);
+	kfree(p);
+#endif
+}
+EXPORT_SYMBOL(iounmap);
+
+#endif /* CHIP_HAS_MMIO() */
author	Chris Metcalf <cmetcalf@tilera.com>	2010-05-28 23:09:12 -0400
committer	Chris Metcalf <cmetcalf@tilera.com>	2010-06-04 17:11:18 -0400
commit	867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree	c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile
parent	5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
download	kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.zip kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.tar.gz kernel_samsung_aries-867e359b97c970a60626d5d76bbe2a8fadbf38fb.tar.bz2