/* ###
 * IP: GHIDRA
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.nio.charset.Charset;
import java.util.Collection;

import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.linux.EmuLinuxAmd64SyscallUseropLibrary;
import ghidra.pcode.emu.linux.EmuLinuxX86SyscallUseropLibrary;
import ghidra.pcode.emu.sys.AnnotatedEmuSyscallUseropLibrary;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeArithmetic.Purpose;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.Program;

/**
 * A userop library that includes system call simulation
 * 
 * <p>
 * Such a library needs to implement {@link EmuSyscallLibrary}. Here we extend
 * {@link AnnotatedEmuSyscallUseropLibrary}, which allows us to implement it using annotated
 * methods. {@link EmuSyscallLibrary#syscall(PcodeExecutor, PcodeUseropLibrary)} is the system call
 * dispatcher, and it requires that each system call implement {@link EmuSyscallDefinition}. System
 * call libraries typically implement that interface by annotating p-code userops with
 * {@link EmuSyscall}. This allows system calls to be implemented via Java callback or Structured
 * Sleigh. Conventionally, the Java method names of system calls should be
 * <em>platform</em>_<em>name</em>. This is to prevent name conflicts among userops when several
 * libraries are composed.
 * 
 * <p>
 * Stock implementations for a limited set of Linux system calls are provided for x86 and amd64 in
 * {@link EmuLinuxX86SyscallUseropLibrary} and {@link EmuLinuxAmd64SyscallUseropLibrary},
 * respectively. The type hierarchy is designed to facilitate the implementation of related systems
 * without (too much) code duplication. Because they derive from the annotation-based
 * implementations, you can add missing system calls by extending one and adding annotated methods
 * as needed.
 * 
 * <p>
 * For demonstration, this will implement one from scratch for no particular operating system, but
 * it will borrow many conventions from Linux-amd64.
 */
public class DemoSyscallLibrary extends AnnotatedEmuSyscallUseropLibrary<byte[]> {
	private final static Charset UTF8 = Charset.forName("utf8");

	// Implement all the required plumbing first:

	/**
	 * An exception type for "user errors." These errors should be communicated back to the target
	 * program rather than causing the emulator to interrupt. This is a bare minimum implementation.
	 * In practice more information should be communicated internally, in case things go further
	 * wrong. Also, a hierarchy of exceptions may be appropriate.
	 */
	static class UserError extends PcodeExecutionException {
		private final int errno;

		public UserError(int errno) {
			super("errno: " + errno);
			this.errno = errno;
		}
	}

	private final Register regRAX;
	private final GhidraScript script;

	/**
	 * Because the system call numbering is derived from the "syscall" overlay on OTHER space, a
	 * program is required. Use the system call analyzer on your program to populate this space. The
	 * program and its compiler spec are also used to derive (what it can of) the system call ABI.
	 * Notably, it applies the calling convention of the functions placed in syscall overlay. Those
	 * parts which cannot (yet) be derived from the program are instead implemented as abstract
	 * methods of this class, e.g., {@link #readSyscallNumber(PcodeExecutorStatePiece)} and
	 * {@link #handleError(PcodeExecutor, PcodeExecutionException)}.
	 * 
	 * @param machine the emulator
	 * @param program the program being emulated
	 */
	public DemoSyscallLibrary(PcodeMachine<byte[]> machine, Program program, GhidraScript script) {
		super(machine, program);
		this.script = script;
		this.regRAX = machine.getLanguage().getRegister("RAX");
		if (regRAX == null) {
			throw new AssertionError("This library only works on x64 targets");
		}
	}

	/**
	 * {@inheritDoc}
	 * 
	 * The dispatcher doesn't know where the system call number is stored. It relies on this method
	 * to read that number from the state. Here we'll assume the target is x64 and RAX contains the
	 * syscall number.
	 */
	@Override
	public long readSyscallNumber(PcodeExecutorState<byte[]> state, Reason reason) {
		return Utils.bytesToLong(state.getVar(regRAX, reason), regRAX.getNumBytes(),
			machine.getLanguage().isBigEndian());
	}

	/**
	 * If the error is a user error, put the errno into the machine as expected by the target
	 * program. Here we negate the errno and put it into RAX. If it's not a user error, we return
	 * false letting the dispatcher know it should interrupt the emulator.
	 */
	@Override
	public boolean handleError(PcodeExecutor<byte[]> executor, PcodeExecutionException err) {
		if (err instanceof UserError) {
			executor.getState()
					.setVar(regRAX, executor.getArithmetic()
							.fromConst(-((UserError) err).errno, regRAX.getNumBytes()));
			return true;
		}
		return false;
	}

	/**
	 * Support for Structured Sleigh is built-in. To enable it, override this method and instantiate
	 * the appropriate (usually nested) class.
	 */
	@Override
	protected StructuredPart newStructuredPart() {
		return new DemoStructuredPart();
	}

	@Override
	protected Collection<DataTypeManager> getAdditionalArchives() {
		// Add platform-specific data type archives, if needed
		return super.getAdditionalArchives();
	}

	// Now, implement some system calls!

	// First, a Java callback example

	/**
	 * Write a buffer of utf-8 characters to the console
	 * 
	 * <p>
	 * The {@link EmuSyscall} annotation allows us to specify the system call name, because the
	 * userop name should be prefixed with the platform name, to avoid naming collisions among
	 * composed libraries.
	 * 
	 * <p>
	 * For demonstration, we will export this as a system call, though that is not required for
	 * {@link DemoStructuredPart#demo_console(StructuredSleigh.Var)} to invoke it. It does need to
	 * be a userop, but it doesn't need to be a syscall.
	 * 
	 * @param str a pointer to the start of the buffer
	 * @param end a pointer to the end (exclusive) of the buffer
	 */
	@PcodeUserop
	@EmuSyscall("write")
	public void demo_write(@OpExecutor PcodeExecutor<byte[]> executor, byte[] str, byte[] end) {
		AddressSpace space = machine.getLanguage().getDefaultSpace();
		/**
		 * Because we have concrete {@code byte[]}, we could use Utils.bytesToLong, but for
		 * demonstration, here's how it can be done if we extended
		 * {@link AnnotatedEmuSyscallUseropLibrary}{@code <T>} instead. If the value cannot be made
		 * concrete, an exception will be thrown. For abstract types, it's a good idea to save a
		 * copy of the arithmetic as a field at library construction time.
		 */
		PcodeArithmetic<byte[]> arithmetic = machine.getArithmetic();
		long strLong = arithmetic.toLong(str, Purpose.LOAD);
		long endLong = arithmetic.toLong(end, Purpose.OTHER);

		byte[] stringBytes = machine.getSharedState()
				.getVar(space, strLong, (int) (endLong - strLong), true, executor.getReason());
		String string = new String(stringBytes, UTF8);
		script.println(string);
	}

	// Second, a Structured Sleigh example

	/**
	 * The nested class for syscalls implemented using Structured Sleigh. Note that no matter the
	 * implementation type, the Java method is annotated with {@link EmuSyscall}. We declare the
	 * class public so that the annotation processor can access the methods. Alternatively, we could
	 * override {@link #getMethodLookup()} to provide the processor private access.
	 */
	public class DemoStructuredPart extends StructuredPart {
		/**
		 * This creates a handle to the "demo_write" p-code userop for use in Structured Sleigh.
		 * Otherwise, there's no way to refer to the userop. Think of it like a "forward" or
		 * "external" declaration.
		 */
		UseropDecl write = userop(type("void"), "demo_write", types("char *", "char *"));

		/**
		 * Write a C-style string to the console
		 * 
		 * @param str the null-terminated utf-8 string
		 */
		@StructuredUserop
		@EmuSyscall("console")
		public void demo_console(@Param(type = "char *") Var str) {
			// Measure the string's length and then invoke write
			Var end = local("end", type("char *"));
			_for(end.set(str), end.deref().neq(0), end.inc(), () -> {
			});
			write.call(str, end);
		}
	}
}