diff options
author | Paul Cercueil | 2020-01-30 12:34:17 -0300 |
---|---|---|
committer | Paul Cercueil | 2020-02-08 11:44:52 -0300 |
commit | d16005f843cd28ae90f091bd4e39a90b355e1d45 (patch) | |
tree | ed765396a7abf5cf7cff244397c19d7b7b0a4dbe /deps/lightrec | |
parent | 4a71579b757d3a2eb6902c84391f429838ad4912 (diff) | |
download | pcsx_rearmed-d16005f843cd28ae90f091bd4e39a90b355e1d45.tar.gz pcsx_rearmed-d16005f843cd28ae90f091bd4e39a90b355e1d45.tar.bz2 pcsx_rearmed-d16005f843cd28ae90f091bd4e39a90b355e1d45.zip |
git subrepo clone https://github.com/pcercuei/lightrec.git deps/lightrec
subrepo:
subdir: "deps/lightrec"
merged: "6c69e10"
upstream:
origin: "https://github.com/pcercuei/lightrec.git"
branch: "master"
commit: "6c69e10"
git-subrepo:
version: "0.4.1"
origin: "https://github.com/ingydotnet/git-subrepo.git"
commit: "a04d8c2"
Diffstat (limited to 'deps/lightrec')
26 files changed, 7744 insertions, 0 deletions
diff --git a/deps/lightrec/.gitignore b/deps/lightrec/.gitignore new file mode 100644 index 0000000..bae14b5 --- /dev/null +++ b/deps/lightrec/.gitignore @@ -0,0 +1,2 @@ +*.o +*.so* diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo new file mode 100644 index 0000000..871f638 --- /dev/null +++ b/deps/lightrec/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; +[subrepo] + remote = https://github.com/pcercuei/lightrec.git + branch = master + commit = 6c69e104d0827e45b8c094d6a61f95c96e9efb15 + parent = b7ee664796db949b417754d11d4ae405cf5144a5 + method = merge + cmdver = 0.4.1 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt new file mode 100644 index 0000000..6ac5cd4 --- /dev/null +++ b/deps/lightrec/CMakeLists.txt @@ -0,0 +1,119 @@ +cmake_minimum_required(VERSION 3.0) +project(lightrec LANGUAGES C VERSION 0.3) + +set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") +if (NOT BUILD_SHARED_LIBS) + add_definitions(-DLIGHTREC_STATIC) +endif (NOT BUILD_SHARED_LIBS) + +if (NOT LOG_LEVEL) + set(LOG_LEVEL Info CACHE STRING "Log level" FORCE) + set_property(CACHE LOG_LEVEL PROPERTY STRINGS NoLog Error Warning Info Debug) +endif() + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING + "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." + FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS None Debug Release RelWithDebInfo MinSizeRel) +endif() + +string(TOUPPER ${LOG_LEVEL} LIGHTREC_LOG_LEVEL) +add_definitions(-DLOG_LEVEL=${LIGHTREC_LOG_LEVEL}_L) + +if (CMAKE_COMPILER_IS_GNUCC) + add_compile_options(-fvisibility=hidden) +endif() + +list(APPEND LIGHTREC_SOURCES + blockcache.c + disassembler.c + emitter.c + interpreter.c + lightrec.c + memmanager.c + optimizer.c + regcache.c +) +list(APPEND LIGHTREC_HEADERS + blockcache.h + debug.h + disassembler.h + emitter.h + interpreter.h + lightrec-private.h + lightrec.h + memmanager.h + optimizer.h + recompiler.h + regcache.h +) + +option(ENABLE_FIRST_PASS "Run the interpreter as first-pass optimization" ON) + +option(ENABLE_THREADED_COMPILER "Enable threaded compiler" ON) +if (ENABLE_THREADED_COMPILER) + list(APPEND LIGHTREC_SOURCES recompiler.c) + + if (NOT ENABLE_FIRST_PASS) + message(SEND_ERROR "Threaded compiler requires first-pass optimization") + endif (NOT ENABLE_FIRST_PASS) +endif (ENABLE_THREADED_COMPILER) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_library(${PROJECT_NAME} ${LIGHTREC_SOURCES} ${LIGHTREC_HEADERS}) +set_target_properties(${PROJECT_NAME} PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + PUBLIC_HEADER lightrec.h + C_STANDARD 11 + C_STANDARD_REQUIRED ON + C_EXTENSIONS OFF +) + +option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF) +if (ENABLE_TINYMM) + find_library(TINYMM_LIBRARIES tinymm REQUIRED) + find_path(TINYMM_INCLUDE_DIR tinymm.h REQUIRED) + + include_directories(${TINYMM_INCLUDE_DIR}) + target_link_libraries(${PROJECT_NAME} PRIVATE ${TINYMM_LIBRARIES}) +endif (ENABLE_TINYMM) + +if (ENABLE_THREADED_COMPILER) + find_library(PTHREAD_LIBRARIES pthread REQUIRED) + find_path(PTHREAD_INCLUDE_DIR pthread.h REQUIRED) + + include_directories(${PTHREAD_INCLUDE_DIR}) + target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES}) +endif (ENABLE_THREADED_COMPILER) + +find_library(LIBLIGHTNING lightning REQUIRED) +find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED) + +include_directories(${LIBLIGHTNING_INCLUDE_DIR}) +target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBLIGHTNING}) + +if (LOG_LEVEL STREQUAL Debug) + find_library(LIBOPCODES NAMES opcodes-multiarch opcodes) + find_path(LIBOPCODES_INCLUDE_DIR dis-asm.h) + + if (NOT LIBOPCODES OR NOT LIBOPCODES_INCLUDE_DIR) + message(SEND_ERROR "Debug log level requires libopcodes (from binutils) to be installed.") + endif () + + set(ENABLE_DISASSEMBLER ON) + include_directories(${LIBOPCODES_INCLUDE_DIR}) + target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBOPCODES}) +endif() + +configure_file(config.h.cmakein config.h @ONLY) + +include(GNUInstallDirs) +install(TARGETS ${PROJECT_NAME} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) diff --git a/deps/lightrec/COPYING b/deps/lightrec/COPYING new file mode 100644 index 0000000..161a3d1 --- /dev/null +++ b/deps/lightrec/COPYING @@ -0,0 +1,482 @@ + GNU LIBRARY GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1991 Free Software Foundation, Inc. + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the library GPL. It is + numbered 2 because it goes with version 2 of the ordinary GPL.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Library General Public License, applies to some +specially designated Free Software Foundation software, and to any +other libraries whose authors decide to use it. You can use it for +your libraries, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if +you distribute copies of the library, or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link a program with the library, you must provide +complete object files to the recipients so that they can relink them +with the library, after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + Our method of protecting your rights has two steps: (1) copyright +the library, and (2) offer you this license which gives you legal +permission to copy, distribute and/or modify the library. + + Also, for each distributor's protection, we want to make certain +that everyone understands that there is no warranty for this free +library. If the library is modified by someone else and passed on, we +want its recipients to know that what they have is not the original +version, so that any problems introduced by others will not reflect on +the original authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that companies distributing free +software will individually obtain patent licenses, thus in effect +transforming the program into proprietary software. To prevent this, +we have made it clear that any patent must be licensed for everyone's +free use or not licensed at all. + + Most GNU software, including some libraries, is covered by the ordinary +GNU General Public License, which was designed for utility programs. This +license, the GNU Library General Public License, applies to certain +designated libraries. This license is quite different from the ordinary +one; be sure to read it in full, and don't assume that anything in it is +the same as in the ordinary license. + + The reason we have a separate public license for some libraries is that +they blur the distinction we usually make between modifying or adding to a +program and simply using it. Linking a program with a library, without +changing the library, is in some sense simply using the library, and is +analogous to running a utility program or application program. However, in +a textual and legal sense, the linked executable is a combined work, a +derivative of the original library, and the ordinary General Public License +treats it as such. + + Because of this blurred distinction, using the ordinary General +Public License for libraries did not effectively promote software +sharing, because most developers did not use the libraries. We +concluded that weaker conditions might promote sharing better. + + However, unrestricted linking of non-free programs would deprive the +users of those programs of all benefit from the free status of the +libraries themselves. This Library General Public License is intended to +permit developers of non-free programs to use free libraries, while +preserving your freedom as a user of such programs to change the free +libraries that are incorporated in them. (We have not seen how to achieve +this as regards changes in header files, but we have achieved it as regards +changes in the actual functions of the Library.) The hope is that this +will lead to faster development of free libraries. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, while the latter only +works together with the library. + + Note that it is possible for a library to be covered by the ordinary +General Public License rather than by this special one. + + GNU LIBRARY GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library which +contains a notice placed by the copyright holder or other authorized +party saying it may be distributed under the terms of this Library +General Public License (also called "this License"). Each licensee is +addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also compile or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + c) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + d) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the source code distributed need not include anything that is normally +distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Library General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/deps/lightrec/README b/deps/lightrec/README new file mode 100644 index 0000000..5bc4627 --- /dev/null +++ b/deps/lightrec/README @@ -0,0 +1 @@ +LightRec is my attempt at creating a dynamic recompiler for MIPS and powered by GNU Lightning. diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c new file mode 100644 index 0000000..833a8e1 --- /dev/null +++ b/deps/lightrec/blockcache.c @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2015-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "blockcache.h" +#include "debug.h" +#include "lightrec-private.h" +#include "memmanager.h" + +#include <stdbool.h> +#include <stdlib.h> + +/* Must be power of two */ +#define LUT_SIZE 0x4000 + +struct blockcache { + struct lightrec_state *state; + struct block * lut[LUT_SIZE]; +}; + +struct block * lightrec_find_block(struct blockcache *cache, u32 pc) +{ + struct block *block; + + pc = kunseg(pc); + + for (block = cache->lut[(pc >> 2) & (LUT_SIZE - 1)]; + block; block = block->next) + if (kunseg(block->pc) == pc) + return block; + + return NULL; +} + +void remove_from_code_lut(struct blockcache *cache, struct block *block) +{ + struct lightrec_state *state = block->state; + const struct opcode *op; + u32 offset = lut_offset(block->pc); + + /* Use state->get_next_block in the code LUT, which basically + * calls back get_next_block_func(), until the compiler + * overrides this. This is required, as a NULL value in the code + * LUT means an outdated block. */ + state->code_lut[offset] = state->get_next_block; + + for (op = block->opcode_list; op; op = op->next) + if (op->c.i.op == OP_META_SYNC) + state->code_lut[offset + op->offset] = NULL; + +} + +void lightrec_mark_for_recompilation(struct blockcache *cache, + struct block *block) +{ + block->flags |= BLOCK_SHOULD_RECOMPILE; +} + +void lightrec_register_block(struct blockcache *cache, struct block *block) +{ + u32 pc = kunseg(block->pc); + struct block *old; + + old = cache->lut[(pc >> 2) & (LUT_SIZE - 1)]; + if (old) + block->next = old; + + cache->lut[(pc >> 2) & (LUT_SIZE - 1)] = block; + + remove_from_code_lut(cache, block); +} + +void lightrec_unregister_block(struct blockcache *cache, struct block *block) +{ + u32 pc = kunseg(block->pc); + struct block *old = cache->lut[(pc >> 2) & (LUT_SIZE - 1)]; + + remove_from_code_lut(cache, block); + + if (old == block) { + cache->lut[(pc >> 2) & (LUT_SIZE - 1)] = old->next; + return; + } + + for (; old; old = old->next) { + if (old->next == block) { + old->next = block->next; + return; + } + } + + pr_err("Block at PC 0x%x is not in cache\n", block->pc); +} + +void lightrec_free_block_cache(struct blockcache *cache) +{ + struct block *block, *next; + unsigned int i; + + for (i = 0; i < LUT_SIZE; i++) { + for (block = cache->lut[i]; block; block = next) { + next = block->next; + lightrec_free_block(block); + } + } + + lightrec_free(cache->state, MEM_FOR_LIGHTREC, sizeof(*cache), cache); +} + +struct blockcache * lightrec_blockcache_init(struct lightrec_state *state) +{ + struct blockcache *cache; + + cache = lightrec_calloc(state, MEM_FOR_LIGHTREC, sizeof(*cache)); + if (!cache) + return NULL; + + cache->state = state; + + return cache; +} + +u32 lightrec_calculate_block_hash(const struct block *block) +{ + const struct lightrec_mem_map *map = block->map; + u32 pc, hash = 0xffffffff; + const u32 *code; + unsigned int i; + + pc = kunseg(block->pc) - map->pc; + + while (map->mirror_of) + map = map->mirror_of; + + code = map->address + pc; + + /* Jenkins one-at-a-time hash algorithm */ + for (i = 0; i < block->nb_ops; i++) { + hash += *code++; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash; +} + +bool lightrec_block_is_outdated(struct block *block) +{ + void **lut_entry = &block->state->code_lut[lut_offset(block->pc)]; + bool outdated; + + if (*lut_entry) + return false; + + outdated = block->hash != lightrec_calculate_block_hash(block); + if (likely(!outdated)) { + /* The block was marked as outdated, but the content is still + * the same */ + if (block->function) + *lut_entry = block->function; + else + *lut_entry = block->state->get_next_block; + } + + return outdated; +} diff --git a/deps/lightrec/blockcache.h b/deps/lightrec/blockcache.h new file mode 100644 index 0000000..0c57ffc --- /dev/null +++ b/deps/lightrec/blockcache.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __BLOCKCACHE_H__ +#define __BLOCKCACHE_H__ + +#include "lightrec.h" + +struct blockcache; + +struct block * lightrec_find_block(struct blockcache *cache, u32 pc); +void lightrec_register_block(struct blockcache *cache, struct block *block); +void lightrec_unregister_block(struct blockcache *cache, struct block *block); + +struct blockcache * lightrec_blockcache_init(struct lightrec_state *state); +void lightrec_free_block_cache(struct blockcache *cache); + +u32 lightrec_calculate_block_hash(const struct block *block); +_Bool lightrec_block_is_outdated(struct block *block); + +void lightrec_mark_for_recompilation(struct blockcache *cache, + struct block *block); + +#endif /* __BLOCKCACHE_H__ */ diff --git a/deps/lightrec/config.h.cmakein b/deps/lightrec/config.h.cmakein new file mode 100644 index 0000000..1eac007 --- /dev/null +++ b/deps/lightrec/config.h.cmakein @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __LIGHTREC_CONFIG_H__ +#define __LIGHTREC_CONFIG_H__ + +#cmakedefine01 ENABLE_THREADED_COMPILER +#cmakedefine01 ENABLE_FIRST_PASS +#cmakedefine01 ENABLE_DISASSEMBLER +#cmakedefine01 ENABLE_TINYMM + +#endif /* __LIGHTREC_CONFIG_H__ */ + diff --git a/deps/lightrec/debug.h b/deps/lightrec/debug.h new file mode 100644 index 0000000..4048d43 --- /dev/null +++ b/deps/lightrec/debug.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef DEBUG_H +#define DEBUG_H + +#include <stdio.h> +#include <unistd.h> + +#define NOLOG_L 0 +#define ERROR_L 1 +#define WARNING_L 2 +#define INFO_L 3 +#define DEBUG_L 4 + +#ifndef LOG_LEVEL +#define LOG_LEVEL INFO_L +#endif + +// ------------- + +#ifndef COLOR_DEBUG +#define COLOR_DEBUG "\e[0;32m" +#endif +#ifndef COLOR_WARNING +#define COLOR_WARNING "\e[01;35m" +#endif +#ifndef COLOR_ERROR +#define COLOR_ERROR "\e[01;31m" +#endif + +#define COLOR_END "\e[0m" + +#if (LOG_LEVEL >= DEBUG_L) +# ifdef COLOR_DEBUG +# define pr_debug(str, ...) do { \ + if (isatty(STDOUT_FILENO)) \ + fprintf(stdout, COLOR_DEBUG "DEBUG: " str COLOR_END, \ + ##__VA_ARGS__); \ + else \ + fprintf(stdout, "DEBUG: " str, ##__VA_ARGS__); \ + } while (0) +# else +# define pr_debug(...) \ + fprintf(stdout, "DEBUG: " __VA_ARGS__) +# endif +#else +#define pr_debug(...) +#endif + +#if (LOG_LEVEL >= INFO_L) +# ifdef COLOR_INFO +# define pr_info(str, ...) \ + fprintf(stdout, COLOR_INFO str COLOR_END, ##__VA_ARGS__) +# else +# define pr_info(...) \ + fprintf(stdout, __VA_ARGS__) +# endif +#else +#define pr_info(...) +#endif + +#if (LOG_LEVEL >= WARNING_L) +# ifdef COLOR_WARNING +# define pr_warn(str, ...) do { \ + if (isatty(STDERR_FILENO)) \ + fprintf(stderr, COLOR_WARNING "WARNING: " str COLOR_END,\ + ##__VA_ARGS__); \ + else \ + fprintf(stderr, "WARNING: " str, ##__VA_ARGS__); \ + } while (0) +# else +# define pr_warn(...) \ + fprintf(stderr, "WARNING: " __VA_ARGS__) +# endif +#else +#define pr_warn(...) +#endif + +#if (LOG_LEVEL >= ERROR_L) +# ifdef COLOR_ERROR +# define pr_err(str, ...) do { \ + if (isatty(STDERR_FILENO)) \ + fprintf(stderr, COLOR_ERROR "ERROR: " str COLOR_END, \ + ##__VA_ARGS__); \ + else \ + fprintf(stderr, "ERROR: " str, ##__VA_ARGS__); \ + } while (0) +# else +# define pr_err(...) \ + fprintf(stderr, "ERROR: " __VA_ARGS__) +# endif +#else +#define pr_err(...) +#endif + +#endif diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c new file mode 100644 index 0000000..06fcec9 --- /dev/null +++ b/deps/lightrec/disassembler.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "config.h" + +#if ENABLE_DISASSEMBLER +#include <dis-asm.h> +#endif +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include "debug.h" +#include "disassembler.h" +#include "lightrec-private.h" +#include "memmanager.h" + +static bool is_unconditional_jump(const struct opcode *op) +{ + switch (op->i.op) { + case OP_SPECIAL: + return op->r.op == OP_SPECIAL_JR || op->r.op == OP_SPECIAL_JALR; + case OP_J: + case OP_JAL: + return true; + case OP_BEQ: + case OP_BLEZ: + return op->i.rs == op->i.rt; + case OP_REGIMM: + return (op->r.rt == OP_REGIMM_BGEZ || + op->r.rt == OP_REGIMM_BGEZAL) && op->i.rs == 0; + default: + return false; + } +} + +static bool is_syscall(const struct opcode *op) +{ + return (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_SYSCALL || + op->r.op == OP_SPECIAL_BREAK)) || + (op->i.op == OP_CP0 && (op->r.rs == OP_CP0_MTC0 || + op->r.rs == OP_CP0_CTC0) && + (op->r.rd == 12 || op->r.rd == 13)); +} + +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list) +{ + struct opcode *next; + + while (list) { + next = list->next; + lightrec_free(state, MEM_FOR_IR, sizeof(*list), list); + list = next; + } +} + +struct opcode * lightrec_disassemble(struct lightrec_state *state, + const u32 *src, unsigned int *len) +{ + struct opcode *head = NULL; + bool stop_next = false; + struct opcode *curr, *last; + unsigned int i; + + for (i = 0, last = NULL; ; i++, last = curr) { + curr = lightrec_calloc(state, MEM_FOR_IR, sizeof(*curr)); + if (!curr) { + pr_err("Unable to allocate memory\n"); + lightrec_free_opcode_list(state, head); + return NULL; + } + + if (!last) + head = curr; + else + last->next = curr; + + /* TODO: Take care of endianness */ + curr->opcode = LE32TOH(*src++); + curr->offset = i; + + /* NOTE: The block disassembly ends after the opcode that + * follows an unconditional jump (delay slot) */ + if (stop_next || is_syscall(curr)) + break; + else if (is_unconditional_jump(curr)) + stop_next = true; + } + + if (len) + *len = (i + 1) * sizeof(u32); + + return head; +} + +unsigned int lightrec_cycles_of_opcode(union code code) +{ + switch (code.i.op) { + case OP_META_REG_UNLOAD: + case OP_META_SYNC: + return 0; + default: + return 2; + } +} + +#if ENABLE_DISASSEMBLER +void lightrec_print_disassembly(const struct block *block, + const u32 *code, unsigned int length) +{ + struct disassemble_info info; + unsigned int i; + + memset(&info, 0, sizeof(info)); + init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + + info.buffer = (bfd_byte *) code; + info.buffer_vma = (bfd_vma)(uintptr_t) code; + info.buffer_length = length; + info.flavour = bfd_target_unknown_flavour; + info.arch = bfd_arch_mips; + info.mach = bfd_mach_mips3000; + disassemble_init_for_target(&info); + + for (i = 0; i < length; i += 4) { + void print_insn_little_mips(bfd_vma, struct disassemble_info *); + putc('\t', stdout); + print_insn_little_mips((bfd_vma)(uintptr_t) code++, &info); + putc('\n', stdout); + } +} +#endif diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h new file mode 100644 index 0000000..e4c4403 --- /dev/null +++ b/deps/lightrec/disassembler.h @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __DISASSEMBLER_H__ +#define __DISASSEMBLER_H__ + +#include "debug.h" +#include "lightrec.h" + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +#define LIGHTREC_DIRECT_IO (1 << 0) +#define LIGHTREC_NO_INVALIDATE (1 << 1) +#define LIGHTREC_NO_DS (1 << 2) +#define LIGHTREC_SMC (1 << 3) +#define LIGHTREC_EMULATE_BRANCH (1 << 4) +#define LIGHTREC_LOCAL_BRANCH (1 << 5) +#define LIGHTREC_HW_IO (1 << 6) +#define LIGHTREC_MULT32 (1 << 7) + +struct block; + +enum standard_opcodes { + OP_SPECIAL = 0x00, + OP_REGIMM = 0x01, + OP_J = 0x02, + OP_JAL = 0x03, + OP_BEQ = 0x04, + OP_BNE = 0x05, + OP_BLEZ = 0x06, + OP_BGTZ = 0x07, + OP_ADDI = 0x08, + OP_ADDIU = 0x09, + OP_SLTI = 0x0a, + OP_SLTIU = 0x0b, + OP_ANDI = 0x0c, + OP_ORI = 0x0d, + OP_XORI = 0x0e, + OP_LUI = 0x0f, + OP_CP0 = 0x10, + OP_CP2 = 0x12, + OP_LB = 0x20, + OP_LH = 0x21, + OP_LWL = 0x22, + OP_LW = 0x23, + OP_LBU = 0x24, + OP_LHU = 0x25, + OP_LWR = 0x26, + OP_SB = 0x28, + OP_SH = 0x29, + OP_SWL = 0x2a, + OP_SW = 0x2b, + OP_SWR = 0x2e, + OP_LWC2 = 0x32, + OP_SWC2 = 0x3a, + + OP_META_REG_UNLOAD = 0x11, + + OP_META_BEQZ = 0x14, + OP_META_BNEZ = 0x15, + + OP_META_MOV = 0x16, + OP_META_SYNC = 0x17, +}; + +enum special_opcodes { + OP_SPECIAL_SLL = 0x00, + OP_SPECIAL_SRL = 0x02, + OP_SPECIAL_SRA = 0x03, + OP_SPECIAL_SLLV = 0x04, + OP_SPECIAL_SRLV = 0x06, + OP_SPECIAL_SRAV = 0x07, + OP_SPECIAL_JR = 0x08, + OP_SPECIAL_JALR = 0x09, + OP_SPECIAL_SYSCALL = 0x0c, + OP_SPECIAL_BREAK = 0x0d, + OP_SPECIAL_MFHI = 0x10, + OP_SPECIAL_MTHI = 0x11, + OP_SPECIAL_MFLO = 0x12, + OP_SPECIAL_MTLO = 0x13, + OP_SPECIAL_MULT = 0x18, + OP_SPECIAL_MULTU = 0x19, + OP_SPECIAL_DIV = 0x1a, + OP_SPECIAL_DIVU = 0x1b, + OP_SPECIAL_ADD = 0x20, + OP_SPECIAL_ADDU = 0x21, + OP_SPECIAL_SUB = 0x22, + OP_SPECIAL_SUBU = 0x23, + OP_SPECIAL_AND = 0x24, + OP_SPECIAL_OR = 0x25, + OP_SPECIAL_XOR = 0x26, + OP_SPECIAL_NOR = 0x27, + OP_SPECIAL_SLT = 0x2a, + OP_SPECIAL_SLTU = 0x2b, +}; + +enum regimm_opcodes { + OP_REGIMM_BLTZ = 0x00, + OP_REGIMM_BGEZ = 0x01, + OP_REGIMM_BLTZAL = 0x10, + OP_REGIMM_BGEZAL = 0x11, +}; + +enum cp0_opcodes { + OP_CP0_MFC0 = 0x00, + OP_CP0_CFC0 = 0x02, + OP_CP0_MTC0 = 0x04, + OP_CP0_CTC0 = 0x06, + OP_CP0_RFE = 0x10, +}; + +enum cp2_opcodes { + OP_CP2_BASIC = 0x00, +}; + +enum cp2_basic_opcodes { + OP_CP2_BASIC_MFC2 = 0x00, + OP_CP2_BASIC_CFC2 = 0x02, + OP_CP2_BASIC_MTC2 = 0x04, + OP_CP2_BASIC_CTC2 = 0x06, +}; + +struct opcode_r { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u32 zero :6; + u32 rs :5; + u32 rt :5; + u32 rd :5; + u32 imm :5; + u32 op :6; +#else + u32 op :6; + u32 imm :5; + u32 rd :5; + u32 rt :5; + u32 rs :5; + u32 zero :6; +#endif +} __packed; + +struct opcode_i { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u32 op :6; + u32 rs :5; + u32 rt :5; + u32 imm :16; +#else + u32 imm :16; + u32 rt :5; + u32 rs :5; + u32 op :6; +#endif +} __packed; + +struct opcode_j { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u32 op :6; + u32 imm :26; +#else + u32 imm :26; + u32 op :6; +#endif +} __packed; + +union code { + /* Keep in sync with struct opcode */ + u32 opcode; + struct opcode_r r; + struct opcode_i i; + struct opcode_j j; +}; + +struct opcode { + /* Keep this union at the first position */ + union { + union code c; + + /* Keep in sync with union code */ + u32 opcode; + struct opcode_r r; + struct opcode_i i; + struct opcode_j j; + }; + u16 flags; + u16 offset; + struct opcode *next; +}; + +struct opcode * lightrec_disassemble(struct lightrec_state *state, + const u32 *src, unsigned int *len); +void lightrec_free_opcode_list(struct lightrec_state *state, + struct opcode *list); + +unsigned int lightrec_cycles_of_opcode(union code code); + +void lightrec_print_disassembly(const struct block *block, + const u32 *code, unsigned int length); + +#endif /* __DISASSEMBLER_H__ */ diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c new file mode 100644 index 0000000..b09dc94 --- /dev/null +++ b/deps/lightrec/emitter.c @@ -0,0 +1,1577 @@ +/* + * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "blockcache.h" +#include "debug.h" +#include "disassembler.h" +#include "emitter.h" +#include "optimizer.h" +#include "regcache.h" + +#include <lightning.h> +#include <stdbool.h> +#include <stddef.h> + +typedef void (*lightrec_rec_func_t)(const struct block *, + const struct opcode *, u32); + +/* Forward declarations */ +static void rec_SPECIAL(const struct block *block, + const struct opcode *op, u32 pc); +static void rec_REGIMM(const struct block *block, + const struct opcode *op, u32 pc); +static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc); +static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc); + + +static void unknown_opcode(const struct block *block, + const struct opcode *op, u32 pc) +{ + pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", op->opcode, pc); +} + +static void lightrec_emit_end_of_block(const struct block *block, + const struct opcode *op, u32 pc, + s8 reg_new_pc, u32 imm, u8 ra_reg, + u32 link, bool update_cycles) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + u32 cycles = state->cycles; + jit_state_t *_jit = block->_jit; + + jit_note(__FILE__, __LINE__); + + if (link) { + /* Update the $ra register */ + u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg); + jit_movi(link_reg, link); + lightrec_free_reg(reg_cache, link_reg); + } + + if (reg_new_pc < 0) { + reg_new_pc = lightrec_alloc_reg(reg_cache, _jit, JIT_V0); + lightrec_lock_reg(reg_cache, _jit, reg_new_pc); + + jit_movi(reg_new_pc, imm); + } + + if (has_delay_slot(op->c) && + !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) { + cycles += lightrec_cycles_of_opcode(op->next->c); + + /* Recompile the delay slot */ + if (op->next->c.opcode) + lightrec_rec_opcode(block, op->next, pc + 4); + } + + /* Store back remaining registers */ + lightrec_storeback_regs(reg_cache, _jit); + + jit_movr(JIT_V0, reg_new_pc); + + if (cycles && update_cycles) { + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); + pr_debug("EOB: %u cycles\n", cycles); + } + + if (op->next && ((op->flags & LIGHTREC_NO_DS) || op->next->next)) + state->branches[state->nb_branches++] = jit_jmpi(); +} + +void lightrec_emit_eob(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + + lightrec_storeback_regs(reg_cache, _jit); + + jit_movi(JIT_V0, pc); + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, + state->cycles - lightrec_cycles_of_opcode(op->c)); + + state->branches[state->nb_branches++] = jit_jmpi(); +} + +static void rec_special_JR(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); + + _jit_name(block->_jit, __func__); + lightrec_lock_reg(reg_cache, _jit, rs); + lightrec_emit_end_of_block(block, op, pc, rs, 0, 31, 0, true); +} + +static void rec_special_JALR(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); + + _jit_name(block->_jit, __func__); + lightrec_lock_reg(reg_cache, _jit, rs); + lightrec_emit_end_of_block(block, op, pc, rs, 0, op->r.rd, pc + 8, true); +} + +static void rec_J(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + lightrec_emit_end_of_block(block, op, pc, -1, + (pc & 0xf0000000) | (op->j.imm << 2), 31, 0, true); +} + +static void rec_JAL(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + lightrec_emit_end_of_block(block, op, pc, -1, + (pc & 0xf0000000) | (op->j.imm << 2), + 31, pc + 8, true); +} + +static void rec_b(const struct block *block, const struct opcode *op, u32 pc, + jit_code_t code, u32 link, bool unconditional, bool bz) +{ + struct regcache *reg_cache = block->state->reg_cache; + struct native_register *regs_backup; + jit_state_t *_jit = block->_jit; + struct lightrec_branch *branch; + jit_node_t *addr; + u8 link_reg; + u32 offset, cycles = block->state->cycles; + bool is_forward = (s16)op->i.imm >= -1; + + jit_note(__FILE__, __LINE__); + + if (!(op->flags & LIGHTREC_NO_DS)) + cycles += lightrec_cycles_of_opcode(op->next->c); + + block->state->cycles = 0; + + if (cycles) + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); + + if (!unconditional) { + u8 rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs), + rt = bz ? 0 : lightrec_alloc_reg_in_ext(reg_cache, + _jit, op->i.rt); + + /* Generate the branch opcode */ + addr = jit_new_node_pww(code, NULL, rs, rt); + + lightrec_free_regs(reg_cache); + regs_backup = lightrec_regcache_enter_branch(reg_cache); + } + + if (op->flags & LIGHTREC_LOCAL_BRANCH) { + if (op->next && !(op->flags & LIGHTREC_NO_DS)) { + /* Recompile the delay slot */ + if (op->next->opcode) + lightrec_rec_opcode(block, op->next, pc + 4); + } + + if (link) { + /* Update the $ra register */ + link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31); + jit_movi(link_reg, link); + lightrec_free_reg(reg_cache, link_reg); + } + + /* Store back remaining registers */ + lightrec_storeback_regs(reg_cache, _jit); + + offset = op->offset + 1 + (s16)op->i.imm; + pr_debug("Adding local branch to offset 0x%x\n", offset << 2); + branch = &block->state->local_branches[ + block->state->nb_local_branches++]; + + branch->target = offset; + if (is_forward) + branch->branch = jit_jmpi(); + else + branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0); + } + + if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) { + lightrec_emit_end_of_block(block, op, pc, -1, + pc + 4 + ((s16)op->i.imm << 2), + 31, link, false); + } + + if (!unconditional) { + jit_patch(addr); + lightrec_regcache_leave_branch(reg_cache, regs_backup); + + if (bz && link) { + /* Update the $ra register */ + link_reg = lightrec_alloc_reg_out_ext(reg_cache, + _jit, 31); + jit_movi(link_reg, (s32)link); + lightrec_free_reg(reg_cache, link_reg); + } + + if (!(op->flags & LIGHTREC_NO_DS) && op->next->opcode) + lightrec_rec_opcode(block, op->next, pc + 4); + } +} + +static void rec_BNE(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_beqr, 0, false, false); +} + +static void rec_BEQ(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_bner, 0, + op->i.rs == op->i.rt, false); +} + +static void rec_BLEZ(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_bgti, 0, op->i.rs == 0, true); +} + +static void rec_BGTZ(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_blei, 0, false, true); +} + +static void rec_regimm_BLTZ(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_bgei, 0, false, true); +} + +static void rec_regimm_BLTZAL(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_bgei, pc + 8, false, true); +} + +static void rec_regimm_BGEZ(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_blti, 0, !op->i.rs, true); +} + +static void rec_regimm_BGEZAL(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_blti, pc + 8, !op->i.rs, true); +} + +static void rec_alu_imm(const struct block *block, const struct opcode *op, + jit_code_t code, bool sign_extend) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rs, rt; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs); + rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt); + + if (sign_extend) + jit_new_node_www(code, rt, rs, (s32)(s16) op->i.imm); + else + jit_new_node_www(code, rt, rs, (u32)(u16) op->i.imm); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); +} + +static void rec_alu_special(const struct block *block, const struct opcode *op, + jit_code_t code, bool out_ext) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rd, rt, rs; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs); + rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt); + + if (out_ext) + rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd); + else + rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd); + + jit_new_node_www(code, rd, rs, rt); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, rd); +} + +static void rec_alu_shiftv(const struct block *block, + const struct opcode *op, jit_code_t code) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rd, rt, rs, temp; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs); + temp = lightrec_alloc_reg_temp(reg_cache, _jit); + + if (code == jit_code_rshr) { + rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt); + rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd); + } else { + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt); + rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd); + } + + jit_andi(temp, rs, 0x1f); + +#if __WORDSIZE == 64 + if (code == jit_code_rshr_u) { + jit_extr_ui(rd, rt); + jit_new_node_www(code, rd, rd, temp); + } +#endif + + if (__WORDSIZE == 32 || code != jit_code_rshr_u) + jit_new_node_www(code, rd, rt, temp); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, temp); + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, rd); +} + +static void rec_ADDIU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_addi, true); +} + +static void rec_ADDI(const struct block *block, const struct opcode *op, u32 pc) +{ + /* TODO: Handle the exception? */ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_addi, true); +} + +static void rec_SLTIU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_lti_u, true); +} + +static void rec_SLTI(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_lti, true); +} + +static void rec_ANDI(const struct block *block, const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rs, rt; + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs); + rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt); + + /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically + * casts to uint8_t / uint16_t. */ + if (op->i.imm == 0xff) + jit_extr_uc(rt, rs); + else if (op->i.imm == 0xffff) + jit_extr_us(rt, rs); + else + jit_andi(rt, rs, (u32)(u16) op->i.imm); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); +} + +static void rec_ORI(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_ori, false); +} + +static void rec_XORI(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_imm(block, op, jit_code_xori, false); +} + +static void rec_LUI(const struct block *block, const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rt; + + jit_name(__func__); + jit_note(__FILE__, __LINE__); + rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt); + + jit_movi(rt, (s32)(op->i.imm << 16)); + + lightrec_free_reg(reg_cache, rt); +} + +static void rec_special_ADDU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_addr, false); +} + +static void rec_special_ADD(const struct block *block, + const struct opcode *op, u32 pc) +{ + /* TODO: Handle the exception? */ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_addr, false); +} + +static void rec_special_SUBU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_subr, false); +} + +static void rec_special_SUB(const struct block *block, + const struct opcode *op, u32 pc) +{ + /* TODO: Handle the exception? */ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_subr, false); +} + +static void rec_special_AND(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_andr, false); +} + +static void rec_special_OR(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_orr, false); +} + +static void rec_special_XOR(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_xorr, false); +} + +static void rec_special_NOR(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rd; + + jit_name(__func__); + rec_alu_special(block, op, jit_code_orr, false); + rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd); + + jit_comr(rd, rd); + + lightrec_free_reg(reg_cache, rd); +} + +static void rec_special_SLTU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_ltr_u, true); +} + +static void rec_special_SLT(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_special(block, op, jit_code_ltr, true); +} + +static void rec_special_SLLV(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shiftv(block, op, jit_code_lshr); +} + +static void rec_special_SRLV(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shiftv(block, op, jit_code_rshr_u); +} + +static void rec_special_SRAV(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shiftv(block, op, jit_code_rshr); +} + +static void rec_alu_shift(const struct block *block, + const struct opcode *op, jit_code_t code) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rd, rt; + + jit_note(__FILE__, __LINE__); + + if (code == jit_code_rshi) { + rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt); + rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd); + } else { + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt); + rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd); + } + +#if __WORDSIZE == 64 + if (code == jit_code_rshi_u) { + jit_extr_ui(rd, rt); + jit_new_node_www(code, rd, rd, op->r.imm); + } +#endif + if (__WORDSIZE == 32 || code != jit_code_rshi_u) + jit_new_node_www(code, rd, rt, op->r.imm); + + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, rd); +} + +static void rec_special_SLL(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shift(block, op, jit_code_lshi); +} + +static void rec_special_SRL(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shift(block, op, jit_code_rshi_u); +} + +static void rec_special_SRA(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_shift(block, op, jit_code_rshi); +} + +static void rec_alu_mult(const struct block *block, + const struct opcode *op, bool is_signed) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 lo, hi, rs, rt; + + jit_note(__FILE__, __LINE__); + + lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO); + if (!(op->flags & LIGHTREC_MULT32)) + hi = lightrec_alloc_reg_out_ext(reg_cache, _jit, REG_HI); + else if (__WORDSIZE == 64) + hi = lightrec_alloc_reg_temp(reg_cache, _jit); + + if (__WORDSIZE == 32 || !is_signed) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs); + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt); + } else { + rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs); + rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt); + } + +#if __WORDSIZE == 32 + /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit + * operation if the MULT was detected a 32-bit only. */ + if (!(op->flags & LIGHTREC_MULT32)) { + if (is_signed) + jit_qmulr(lo, hi, rs, rt); + else + jit_qmulr_u(lo, hi, rs, rt); + } else { + jit_mulr(lo, rs, rt); + } +#else + /* On 64-bit systems, do a 64*64->64 bit operation. + * The input registers must be 32 bits, so we first sign-extend (if + * mult) or clear (if multu) the input registers. */ + if (is_signed) { + jit_mulr(lo, rs, rt); + } else { + jit_extr_ui(lo, rt); + jit_extr_ui(hi, rs); + jit_mulr(lo, hi, lo); + } + + /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ + if (!(op->flags & LIGHTREC_MULT32)) + jit_rshi(hi, lo, 32); +#endif + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, lo); + if (__WORDSIZE == 64 || !(op->flags & LIGHTREC_MULT32)) + lightrec_free_reg(reg_cache, hi); +} + +static void rec_alu_div(const struct block *block, + const struct opcode *op, bool is_signed) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + jit_node_t *branch, *to_end; + u8 lo, hi, rs, rt; + + jit_note(__FILE__, __LINE__); + lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO); + hi = lightrec_alloc_reg_out(reg_cache, _jit, REG_HI); + + if (__WORDSIZE == 32 || !is_signed) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs); + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt); + } else { + rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs); + rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt); + } + + /* Jump to special handler if dividing by zero */ + branch = jit_beqi(rt, 0); + +#if __WORDSIZE == 32 + if (is_signed) + jit_qdivr(lo, hi, rs, rt); + else + jit_qdivr_u(lo, hi, rs, rt); +#else + /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend + * (if div) or clear (if divu) the input registers. */ + if (is_signed) { + jit_qdivr(lo, hi, rs, rt); + } else { + jit_extr_ui(lo, rt); + jit_extr_ui(hi, rs); + jit_qdivr_u(lo, hi, hi, lo); + } +#endif + + /* Jump above the div-by-zero handler */ + to_end = jit_jmpi(); + + jit_patch(branch); + + if (is_signed) { + jit_lti(lo, rs, 0); + jit_lshi(lo, lo, 1); + jit_subi(lo, lo, 1); + } else { + jit_movi(lo, 0xffffffff); + } + + jit_movr(hi, rs); + + jit_patch(to_end); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, lo); + lightrec_free_reg(reg_cache, hi); +} + +static void rec_special_MULT(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mult(block, op, true); +} + +static void rec_special_MULTU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mult(block, op, false); +} + +static void rec_special_DIV(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_div(block, op, true); +} + +static void rec_special_DIVU(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_div(block, op, false); +} + +static void rec_alu_mv_lo_hi(const struct block *block, u8 dst, u8 src) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + + jit_note(__FILE__, __LINE__); + src = lightrec_alloc_reg_in(reg_cache, _jit, src); + dst = lightrec_alloc_reg_out_ext(reg_cache, _jit, dst); + +#if __WORDSIZE == 32 + jit_movr(dst, src); +#else + jit_extr_i(dst, src); +#endif + + lightrec_free_reg(reg_cache, src); + lightrec_free_reg(reg_cache, dst); +} + +static void rec_special_MFHI(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mv_lo_hi(block, op->r.rd, REG_HI); +} + +static void rec_special_MTHI(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mv_lo_hi(block, REG_HI, op->r.rs); +} + +static void rec_special_MFLO(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mv_lo_hi(block, op->r.rd, REG_LO); +} + +static void rec_special_MTLO(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_alu_mv_lo_hi(block, REG_LO, op->r.rs); +} + +static void rec_io(const struct block *block, const struct opcode *op, + bool load_rt, bool read_rt) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + bool is_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + u32 offset; + u8 tmp, tmp2, tmp3; + + jit_note(__FILE__, __LINE__); + + tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); + + if (is_tagged) { + offset = offsetof(struct lightrec_state, rw_func); + } else { + tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); + offset = offsetof(struct lightrec_state, rw_generic_func); + } + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp2, LIGHTREC_REG_STATE, offset); + + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false); + + if (read_rt && likely(op->i.rt)) + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true); + else if (load_rt) + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false); + + if (is_tagged) { + jit_movi(tmp, op->opcode); + } else { + jit_movi(tmp, (uintptr_t)op); + jit_movi(tmp3, (uintptr_t)block); + } + + jit_callr(tmp2); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + if (!is_tagged) + lightrec_free_reg(reg_cache, tmp3); + lightrec_regcache_mark_live(reg_cache, _jit); +} + +static void rec_store_direct_no_invalidate(const struct block *block, + const struct opcode *op, + jit_code_t code) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + jit_node_t *to_not_ram, *to_end; + u8 tmp, tmp2, rs, rt; + s16 imm; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs); + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* Convert to KUNSEG and avoid RAM mirrors */ + if (state->mirrors_mapped) { + imm = (s16)op->i.imm; + jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1)); + } else if (op->i.imm) { + imm = 0; + jit_addi(tmp, rs, (s16)op->i.imm); + jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1)); + } else { + imm = 0; + jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1)); + } + + lightrec_free_reg(reg_cache, rs); + + if (state->offset_ram != state->offset_scratch) { + to_not_ram = jit_bmsi(tmp, BIT(28)); + + jit_movi(tmp2, state->offset_ram); + + to_end = jit_jmpi(); + jit_patch(to_not_ram); + + jit_movi(tmp2, state->offset_scratch); + jit_patch(to_end); + } else if (state->offset_ram) { + jit_movi(tmp2, state->offset_ram); + } + + if (state->offset_ram || state->offset_scratch) + jit_addr(tmp, tmp, tmp2); + + lightrec_free_reg(reg_cache, tmp2); + + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt); + jit_new_node_www(code, imm, tmp, rt); + + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, tmp); +} + +static void rec_store_direct(const struct block *block, const struct opcode *op, + jit_code_t code) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + jit_node_t *to_not_ram, *to_end; + u8 tmp, tmp2, tmp3, rs, rt; + + jit_note(__FILE__, __LINE__); + + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0); + + /* Convert to KUNSEG and avoid RAM mirrors */ + if (op->i.imm) { + jit_addi(tmp2, rs, (s16)op->i.imm); + jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1)); + } else { + jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1)); + } + + lightrec_free_reg(reg_cache, rs); + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + to_not_ram = jit_bgti(tmp2, RAM_SIZE); + + /* Compute the offset to the code LUT */ + jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3); +#if __WORDSIZE == 64 + jit_lshi(tmp, tmp, 1); +#endif + jit_addr(tmp, LIGHTREC_REG_STATE, tmp); + + /* Write NULL to the code LUT to invalidate any block that's there */ + jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + + if (state->offset_ram != state->offset_scratch) { + jit_movi(tmp, state->offset_ram); + + to_end = jit_jmpi(); + } + + jit_patch(to_not_ram); + + if (state->offset_ram || state->offset_scratch) + jit_movi(tmp, state->offset_scratch); + + if (state->offset_ram != state->offset_scratch) + jit_patch(to_end); + + if (state->offset_ram || state->offset_scratch) + jit_addr(tmp2, tmp2, tmp); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp3); + + rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt); + jit_new_node_www(code, 0, tmp2, rt); + + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, tmp2); +} + +static void rec_store(const struct block *block, const struct opcode *op, + jit_code_t code) +{ + if (op->flags & LIGHTREC_NO_INVALIDATE) { + rec_store_direct_no_invalidate(block, op, code); + } else if (op->flags & LIGHTREC_DIRECT_IO) { + if (block->state->invalidate_from_dma_only) + rec_store_direct_no_invalidate(block, op, code); + else + rec_store_direct(block, op, code); + } else { + rec_io(block, op, true, false); + } +} + +static void rec_SB(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_store(block, op, jit_code_stxi_c); +} + +static void rec_SH(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_store(block, op, jit_code_stxi_s); +} + +static void rec_SW(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_store(block, op, jit_code_stxi_i); +} + +static void rec_SWL(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, true, false); +} + +static void rec_SWR(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, true, false); +} + +static void rec_SWC2(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, false, false); +} + +static void rec_load_direct(const struct block *block, const struct opcode *op, + jit_code_t code) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2; + u8 tmp, rs, rt, addr_reg; + s16 imm; + + if (!op->i.rt) + return; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs); + rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt); + + if ((state->offset_ram == state->offset_bios && + state->offset_ram == state->offset_scratch && + state->mirrors_mapped) || !op->i.imm) { + addr_reg = rs; + imm = (s16)op->i.imm; + } else { + jit_addi(rt, rs, (s16)op->i.imm); + addr_reg = rt; + imm = 0; + + if (op->i.rs != op->i.rt) + lightrec_free_reg(reg_cache, rs); + } + + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + if (state->offset_ram == state->offset_bios && + state->offset_ram == state->offset_scratch) { + if (!state->mirrors_mapped) { + jit_andi(tmp, addr_reg, BIT(28)); + jit_rshi_u(tmp, tmp, 28 - 22); + jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1)); + jit_andr(rt, addr_reg, tmp); + } else { + jit_andi(rt, addr_reg, 0x1fffffff); + } + + if (state->offset_ram) + jit_movi(tmp, state->offset_ram); + } else { + to_not_ram = jit_bmsi(addr_reg, BIT(28)); + + /* Convert to KUNSEG and avoid RAM mirrors */ + jit_andi(rt, addr_reg, RAM_SIZE - 1); + + if (state->offset_ram) + jit_movi(tmp, state->offset_ram); + + to_end = jit_jmpi(); + + jit_patch(to_not_ram); + + if (state->offset_bios != state->offset_scratch) + to_not_bios = jit_bmci(addr_reg, BIT(22)); + + /* Convert to KUNSEG */ + jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1)); + + jit_movi(tmp, state->offset_bios); + + if (state->offset_bios != state->offset_scratch) { + to_end2 = jit_jmpi(); + + jit_patch(to_not_bios); + + /* Convert to KUNSEG */ + jit_andi(rt, addr_reg, 0x1f800fff); + + if (state->offset_scratch) + jit_movi(tmp, state->offset_scratch); + + jit_patch(to_end2); + } + + jit_patch(to_end); + } + + if (state->offset_ram || state->offset_bios || state->offset_scratch) + jit_addr(rt, rt, tmp); + + jit_new_node_www(code, rt, rt, imm); + + lightrec_free_reg(reg_cache, addr_reg); + lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, tmp); +} + +static void rec_load(const struct block *block, const struct opcode *op, + jit_code_t code) +{ + if (op->flags & LIGHTREC_DIRECT_IO) + rec_load_direct(block, op, code); + else + rec_io(block, op, false, true); +} + +static void rec_LB(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_load(block, op, jit_code_ldxi_c); +} + +static void rec_LBU(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_load(block, op, jit_code_ldxi_uc); +} + +static void rec_LH(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_load(block, op, jit_code_ldxi_s); +} + +static void rec_LHU(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_load(block, op, jit_code_ldxi_us); +} + +static void rec_LWL(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, true, true); +} + +static void rec_LWR(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, true, true); +} + +static void rec_LW(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_load(block, op, jit_code_ldxi_i); +} + +static void rec_LWC2(const struct block *block, const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_io(block, op, false, false); +} + +static void rec_break_syscall(const struct block *block, + const struct opcode *op, u32 pc, bool is_break) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u32 offset; + u8 tmp; + + jit_note(__FILE__, __LINE__); + + if (is_break) + offset = offsetof(struct lightrec_state, break_func); + else + offset = offsetof(struct lightrec_state, syscall_func); + + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp, LIGHTREC_REG_STATE, offset); + jit_callr(tmp); + lightrec_free_reg(reg_cache, tmp); + + lightrec_regcache_mark_live(reg_cache, _jit); + + /* TODO: the return address should be "pc - 4" if we're a delay slot */ + lightrec_emit_end_of_block(block, op, pc, -1, pc, 31, 0, true); +} + +static void rec_special_SYSCALL(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_break_syscall(block, op, pc, false); +} + +static void rec_special_BREAK(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_break_syscall(block, op, pc, true); +} + +static void rec_mfc(const struct block *block, const struct opcode *op) +{ + u8 tmp, tmp2; + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + + jit_note(__FILE__, __LINE__); + + tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_ldxi(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, mfc_func)); + + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true); + + jit_movi(tmp, op->opcode); + jit_callr(tmp2); + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + + lightrec_regcache_mark_live(reg_cache, _jit); +} + +static void rec_mtc(const struct block *block, const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 tmp, tmp2; + + jit_note(__FILE__, __LINE__); + + tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, mtc_func)); + + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false); + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false); + + jit_movi(tmp, op->opcode); + jit_callr(tmp2); + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + + lightrec_regcache_mark_live(reg_cache, _jit); + + if (op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13)) + lightrec_emit_end_of_block(block, op, pc, -1, pc + 4, 0, 0, true); +} + +static void rec_cp0_MFC0(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mfc(block, op); +} + +static void rec_cp0_CFC0(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mfc(block, op); +} + +static void rec_cp0_MTC0(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mtc(block, op, pc); +} + +static void rec_cp0_CTC0(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mtc(block, op, pc); +} + +static void rec_cp2_basic_MFC2(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mfc(block, op); +} + +static void rec_cp2_basic_CFC2(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mfc(block, op); +} + +static void rec_cp2_basic_MTC2(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mtc(block, op, pc); +} + +static void rec_cp2_basic_CTC2(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_mtc(block, op, pc); +} + +static void rec_cp0_RFE(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + jit_state_t *_jit = block->_jit; + u8 tmp; + + jit_name(__func__); + jit_note(__FILE__, __LINE__); + + tmp = lightrec_alloc_reg_temp(state->reg_cache, _jit); + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, rfe_func)); + jit_callr(tmp); + lightrec_free_reg(state->reg_cache, tmp); + + lightrec_regcache_mark_live(state->reg_cache, _jit); +} + +static void rec_CP(const struct block *block, const struct opcode *op, u32 pc) +{ + struct regcache *reg_cache = block->state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 tmp, tmp2; + + jit_name(__func__); + jit_note(__FILE__, __LINE__); + + tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_ldxi(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, cp_func)); + + jit_movi(tmp, op->opcode); + jit_callr(tmp2); + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + + lightrec_regcache_mark_live(reg_cache, _jit); +} + +static void rec_meta_unload(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + + jit_name(__func__); + jit_note(__FILE__, __LINE__); + + pr_debug("Unloading reg %s\n", lightrec_reg_name(op->i.rs)); + lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true); +} + +static void rec_meta_BEQZ(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_bnei, 0, false, true); +} + +static void rec_meta_BNEZ(const struct block *block, + const struct opcode *op, u32 pc) +{ + _jit_name(block->_jit, __func__); + rec_b(block, op, pc, jit_code_beqi, 0, false, true); +} + +static void rec_meta_MOV(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 rs, rd; + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); + rs = op->r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs) : 0; + rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd); + + if (op->r.rs == 0) { + jit_movi(rd, 0); + } else { +#if __WORDSIZE == 32 + jit_movr(rd, rs); +#else + jit_extr_i(rd, rs); +#endif + } + + lightrec_free_reg(state->reg_cache, rs); + lightrec_free_reg(state->reg_cache, rd); +} + +static void rec_meta_sync(const struct block *block, + const struct opcode *op, u32 pc) +{ + struct lightrec_state *state = block->state; + struct lightrec_branch_target *target; + jit_state_t *_jit = block->_jit; + + jit_name(__func__); + jit_note(__FILE__, __LINE__); + + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); + state->cycles = 0; + + lightrec_storeback_regs(state->reg_cache, _jit); + lightrec_regcache_reset(state->reg_cache); + + pr_debug("Adding branch target at offset 0x%x\n", + op->offset << 2); + target = &state->targets[state->nb_targets++]; + target->offset = op->offset; + target->label = jit_label(); +} + +static const lightrec_rec_func_t rec_standard[64] = { + [OP_SPECIAL] = rec_SPECIAL, + [OP_REGIMM] = rec_REGIMM, + [OP_J] = rec_J, + [OP_JAL] = rec_JAL, + [OP_BEQ] = rec_BEQ, + [OP_BNE] = rec_BNE, + [OP_BLEZ] = rec_BLEZ, + [OP_BGTZ] = rec_BGTZ, + [OP_ADDI] = rec_ADDI, + [OP_ADDIU] = rec_ADDIU, + [OP_SLTI] = rec_SLTI, + [OP_SLTIU] = rec_SLTIU, + [OP_ANDI] = rec_ANDI, + [OP_ORI] = rec_ORI, + [OP_XORI] = rec_XORI, + [OP_LUI] = rec_LUI, + [OP_CP0] = rec_CP0, + [OP_CP2] = rec_CP2, + [OP_LB] = rec_LB, + [OP_LH] = rec_LH, + [OP_LWL] = rec_LWL, + [OP_LW] = rec_LW, + [OP_LBU] = rec_LBU, + [OP_LHU] = rec_LHU, + [OP_LWR] = rec_LWR, + [OP_SB] = rec_SB, + [OP_SH] = rec_SH, + [OP_SWL] = rec_SWL, + [OP_SW] = rec_SW, + [OP_SWR] = rec_SWR, + [OP_LWC2] = rec_LWC2, + [OP_SWC2] = rec_SWC2, + + [OP_META_REG_UNLOAD] = rec_meta_unload, + [OP_META_BEQZ] = rec_meta_BEQZ, + [OP_META_BNEZ] = rec_meta_BNEZ, + [OP_META_MOV] = rec_meta_MOV, + [OP_META_SYNC] = rec_meta_sync, +}; + +static const lightrec_rec_func_t rec_special[64] = { + [OP_SPECIAL_SLL] = rec_special_SLL, + [OP_SPECIAL_SRL] = rec_special_SRL, + [OP_SPECIAL_SRA] = rec_special_SRA, + [OP_SPECIAL_SLLV] = rec_special_SLLV, + [OP_SPECIAL_SRLV] = rec_special_SRLV, + [OP_SPECIAL_SRAV] = rec_special_SRAV, + [OP_SPECIAL_JR] = rec_special_JR, + [OP_SPECIAL_JALR] = rec_special_JALR, + [OP_SPECIAL_SYSCALL] = rec_special_SYSCALL, + [OP_SPECIAL_BREAK] = rec_special_BREAK, + [OP_SPECIAL_MFHI] = rec_special_MFHI, + [OP_SPECIAL_MTHI] = rec_special_MTHI, + [OP_SPECIAL_MFLO] = rec_special_MFLO, + [OP_SPECIAL_MTLO] = rec_special_MTLO, + [OP_SPECIAL_MULT] = rec_special_MULT, + [OP_SPECIAL_MULTU] = rec_special_MULTU, + [OP_SPECIAL_DIV] = rec_special_DIV, + [OP_SPECIAL_DIVU] = rec_special_DIVU, + [OP_SPECIAL_ADD] = rec_special_ADD, + [OP_SPECIAL_ADDU] = rec_special_ADDU, + [OP_SPECIAL_SUB] = rec_special_SUB, + [OP_SPECIAL_SUBU] = rec_special_SUBU, + [OP_SPECIAL_AND] = rec_special_AND, + [OP_SPECIAL_OR] = rec_special_OR, + [OP_SPECIAL_XOR] = rec_special_XOR, + [OP_SPECIAL_NOR] = rec_special_NOR, + [OP_SPECIAL_SLT] = rec_special_SLT, + [OP_SPECIAL_SLTU] = rec_special_SLTU, +}; + +static const lightrec_rec_func_t rec_regimm[64] = { + [OP_REGIMM_BLTZ] = rec_regimm_BLTZ, + [OP_REGIMM_BGEZ] = rec_regimm_BGEZ, + [OP_REGIMM_BLTZAL] = rec_regimm_BLTZAL, + [OP_REGIMM_BGEZAL] = rec_regimm_BGEZAL, +}; + +static const lightrec_rec_func_t rec_cp0[64] = { + [OP_CP0_MFC0] = rec_cp0_MFC0, + [OP_CP0_CFC0] = rec_cp0_CFC0, + [OP_CP0_MTC0] = rec_cp0_MTC0, + [OP_CP0_CTC0] = rec_cp0_CTC0, + [OP_CP0_RFE] = rec_cp0_RFE, +}; + +static const lightrec_rec_func_t rec_cp2_basic[64] = { + [OP_CP2_BASIC_MFC2] = rec_cp2_basic_MFC2, + [OP_CP2_BASIC_CFC2] = rec_cp2_basic_CFC2, + [OP_CP2_BASIC_MTC2] = rec_cp2_basic_MTC2, + [OP_CP2_BASIC_CTC2] = rec_cp2_basic_CTC2, +}; + +static void rec_SPECIAL(const struct block *block, + const struct opcode *op, u32 pc) +{ + lightrec_rec_func_t f = rec_special[op->r.op]; + if (likely(f)) + (*f)(block, op, pc); + else + unknown_opcode(block, op, pc); +} + +static void rec_REGIMM(const struct block *block, + const struct opcode *op, u32 pc) +{ + lightrec_rec_func_t f = rec_regimm[op->r.rt]; + if (likely(f)) + (*f)(block, op, pc); + else + unknown_opcode(block, op, pc); +} + +static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc) +{ + lightrec_rec_func_t f = rec_cp0[op->r.rs]; + if (likely(f)) + (*f)(block, op, pc); + else + rec_CP(block, op, pc); +} + +static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc) +{ + if (op->r.op == OP_CP2_BASIC) { + lightrec_rec_func_t f = rec_cp2_basic[op->r.rs]; + if (likely(f)) { + (*f)(block, op, pc); + return; + } + } + + rec_CP(block, op, pc); +} + +void lightrec_rec_opcode(const struct block *block, + const struct opcode *op, u32 pc) +{ + lightrec_rec_func_t f = rec_standard[op->i.op]; + if (likely(f)) + (*f)(block, op, pc); + else + unknown_opcode(block, op, pc); +} diff --git a/deps/lightrec/emitter.h b/deps/lightrec/emitter.h new file mode 100644 index 0000000..57ededf --- /dev/null +++ b/deps/lightrec/emitter.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __EMITTER_H__ +#define __EMITTER_H__ + +#include "lightrec.h" + +struct block; +struct opcode; + +void lightrec_rec_opcode(const struct block *block, + const struct opcode *op, u32 pc); +void lightrec_emit_eob(const struct block *block, + const struct opcode *op, u32 pc); + +#endif /* __EMITTER_H__ */ diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c new file mode 100644 index 0000000..acc41ea --- /dev/null +++ b/deps/lightrec/interpreter.c @@ -0,0 +1,1124 @@ +/* + * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "disassembler.h" +#include "interpreter.h" +#include "lightrec-private.h" +#include "optimizer.h" +#include "regcache.h" + +#include <stdbool.h> + +struct interpreter; + +static u32 int_CP0(struct interpreter *inter); +static u32 int_CP2(struct interpreter *inter); +static u32 int_SPECIAL(struct interpreter *inter); +static u32 int_REGIMM(struct interpreter *inter); +static u32 int_branch(struct interpreter *inter, u32 pc, + union code code, bool branch); + +typedef u32 (*lightrec_int_func_t)(struct interpreter *inter); + +static const lightrec_int_func_t int_standard[64]; + +struct interpreter { + struct lightrec_state *state; + struct block *block; + struct opcode *op; + u32 cycles; + bool delay_slot; +}; + +static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter) +{ + return (*func)(inter); +} + +static inline u32 jump_skip(struct interpreter *inter) +{ + inter->op = inter->op->next; + + return execute(int_standard[inter->op->i.op], inter); +} + +static inline u32 jump_next(struct interpreter *inter) +{ + inter->cycles += lightrec_cycles_of_opcode(inter->op->c); + + if (unlikely(inter->delay_slot)) + return 0; + + return jump_skip(inter); +} + +static inline u32 jump_after_branch(struct interpreter *inter) +{ + inter->cycles += lightrec_cycles_of_opcode(inter->op->c); + + if (unlikely(inter->delay_slot)) + return 0; + + inter->op = inter->op->next; + + return jump_skip(inter); +} + +static void update_cycles_before_branch(struct interpreter *inter) +{ + u32 cycles; + + if (!inter->delay_slot) { + cycles = lightrec_cycles_of_opcode(inter->op->c); + + if (has_delay_slot(inter->op->c) && + !(inter->op->flags & LIGHTREC_NO_DS)) + cycles += lightrec_cycles_of_opcode(inter->op->next->c); + + inter->cycles += cycles; + inter->state->current_cycle += inter->cycles; + inter->cycles = -cycles; + } +} + +static bool is_branch_taken(const u32 *reg_cache, union code op) +{ + switch (op.i.op) { + case OP_SPECIAL: + return op.r.op == OP_SPECIAL_JR || op.r.op == OP_SPECIAL_JALR; + case OP_J: + case OP_JAL: + return true; + case OP_BEQ: + case OP_META_BEQZ: + return reg_cache[op.r.rs] == reg_cache[op.r.rt]; + case OP_BNE: + case OP_META_BNEZ: + return reg_cache[op.r.rs] != reg_cache[op.r.rt]; + case OP_REGIMM: + switch (op.r.rt) { + case OP_REGIMM_BLTZ: + case OP_REGIMM_BLTZAL: + return (s32)reg_cache[op.r.rs] < 0; + case OP_REGIMM_BGEZ: + case OP_REGIMM_BGEZAL: + return (s32)reg_cache[op.r.rs] >= 0; + } + default: + break; + } + + return false; +} + +static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) +{ + struct lightrec_state *state = inter->state; + u32 *reg_cache = state->native_reg_cache; + struct opcode new_op, *op = inter->op->next; + union code op_next; + struct interpreter inter2 = { + .state = state, + .cycles = inter->cycles, + .delay_slot = true, + .block = NULL, + }; + bool run_first_op = false, dummy_ld = false, save_rs = false, + load_in_ds, branch_in_ds = false, branch_at_addr = false, + branch_taken; + u32 old_rs, new_rs, new_rt; + u32 next_pc, ds_next_pc; + u32 cause, epc; + + if (op->i.op == OP_CP0 && op->r.rs == OP_CP0_RFE) { + /* When an IRQ happens, the PSX exception handlers (when done) + * will jump back to the instruction that was executed right + * before the IRQ, unless it was a GTE opcode; in that case, it + * jumps to the instruction right after. + * Since we will never handle the IRQ right after a GTE opcode, + * but on branch boundaries, we need to adjust the return + * address so that the GTE opcode is effectively executed. + */ + cause = (*state->ops.cop0_ops.cfc)(state, 13); + epc = (*state->ops.cop0_ops.cfc)(state, 14); + + if (!(cause & 0x7c) && epc == pc - 4) + pc -= 4; + } + + if (inter->delay_slot) { + /* The branch opcode was in a delay slot of another branch + * opcode. Just return the target address of the second + * branch. */ + return pc; + } + + /* An opcode located in the delay slot performing a delayed read + * requires special handling; we will always resort to using the + * interpreter in that case. + * Same goes for when we have a branch in a delay slot of another + * branch. */ + load_in_ds = load_in_delay_slot(op->c); + branch_in_ds = has_delay_slot(op->c); + + if (branch) { + if (load_in_ds || branch_in_ds) + op_next = lightrec_read_opcode(state, pc); + + if (load_in_ds) { + /* Verify that the next block actually reads the + * destination register of the delay slot opcode. */ + run_first_op = opcode_reads_register(op_next, op->r.rt); + } + + if (branch_in_ds) { + run_first_op = true; + next_pc = pc + 4; + } + + if (load_in_ds && run_first_op) { + next_pc = pc + 4; + + /* If the first opcode of the next block writes the + * regiser used as the address for the load, we need to + * reset to the old value after it has been executed, + * then restore the new value after the delay slot + * opcode has been executed. */ + save_rs = opcode_reads_register(op->c, op->r.rs) && + opcode_writes_register(op_next, op->r.rs); + if (save_rs) + old_rs = reg_cache[op->r.rs]; + + /* If both the first opcode of the next block and the + * delay slot opcode write to the same register, the + * value written by the delay slot opcode is + * discarded. */ + dummy_ld = opcode_writes_register(op_next, op->r.rt); + } + + if (!run_first_op) { + next_pc = pc; + } else if (has_delay_slot(op_next)) { + /* The first opcode of the next block is a branch, so we + * cannot execute it here, because of the load delay. + * Just check whether or not the branch would be taken, + * and save that info into the interpreter struct. */ + branch_at_addr = true; + branch_taken = is_branch_taken(reg_cache, op_next); + pr_debug("Target of impossible branch is a branch, " + "%staken.\n", branch_taken ? "" : "not "); + } else { + new_op.c = op_next; + new_op.flags = 0; + new_op.offset = 0; + new_op.next = NULL; + inter2.op = &new_op; + + /* Execute the first opcode of the next block */ + (*int_standard[inter2.op->i.op])(&inter2); + + if (save_rs) { + new_rs = reg_cache[op->r.rs]; + reg_cache[op->r.rs] = old_rs; + } + + inter->cycles += lightrec_cycles_of_opcode(op_next); + } + } else { + next_pc = inter->block->pc + + (inter->op->offset + 2) * sizeof(u32); + } + + inter2.block = inter->block; + inter2.op = op; + inter2.cycles = inter->cycles; + + if (dummy_ld) + new_rt = reg_cache[op->r.rt]; + + /* Execute delay slot opcode */ + if (branch_at_addr) + ds_next_pc = int_branch(&inter2, pc, op_next, branch_taken); + else + ds_next_pc = (*int_standard[inter2.op->i.op])(&inter2); + + if (branch_at_addr && !branch_taken) { + /* If the branch at the target of the branch opcode is not + * taken, we jump to its delay slot */ + next_pc = pc + sizeof(u32); + } else if (!branch && branch_in_ds) { + next_pc = ds_next_pc; + } + + if (save_rs) + reg_cache[op->r.rs] = new_rs; + if (dummy_ld) + reg_cache[op->r.rt] = new_rt; + + inter->cycles += lightrec_cycles_of_opcode(op->c); + + if (branch_at_addr && branch_taken) { + /* If the branch at the target of the branch opcode is taken, + * we execute its delay slot here, and jump to its target + * address. */ + op_next = lightrec_read_opcode(state, pc + 4); + + new_op.c = op_next; + new_op.flags = 0; + new_op.offset = sizeof(u32); + new_op.next = NULL; + inter2.op = &new_op; + inter2.block = NULL; + + inter->cycles += lightrec_cycles_of_opcode(op_next); + + pr_debug("Running delay slot of branch at target of impossible " + "branch\n"); + (*int_standard[inter2.op->i.op])(&inter2); + } + + return next_pc; +} + +static u32 int_unimplemented(struct interpreter *inter) +{ + pr_warn("Unimplemented opcode 0x%08x\n", inter->op->opcode); + + return jump_next(inter); +} + +static u32 int_jump(struct interpreter *inter, bool link) +{ + struct lightrec_state *state = inter->state; + u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32); + u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2); + + if (link) + state->native_reg_cache[31] = old_pc + 8; + + if (inter->op->flags & LIGHTREC_NO_DS) + return pc; + + return int_delay_slot(inter, pc, true); +} + +static u32 int_J(struct interpreter *inter) +{ + return int_jump(inter, false); +} + +static u32 int_JAL(struct interpreter *inter) +{ + return int_jump(inter, true); +} + +static u32 int_jumpr(struct interpreter *inter, u8 link_reg) +{ + struct lightrec_state *state = inter->state; + u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs]; + + if (link_reg) { + old_pc = inter->block->pc + inter->op->offset * sizeof(u32); + state->native_reg_cache[link_reg] = old_pc + 8; + } + + if (inter->op->flags & LIGHTREC_NO_DS) + return next_pc; + + return int_delay_slot(inter, next_pc, true); +} + +static u32 int_special_JR(struct interpreter *inter) +{ + return int_jumpr(inter, 0); +} + +static u32 int_special_JALR(struct interpreter *inter) +{ + return int_jumpr(inter, inter->op->r.rd); +} + +static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc) +{ + if (!inter->delay_slot && + (inter->op->flags & LIGHTREC_LOCAL_BRANCH) && + (s16)inter->op->c.i.imm >= 0) { + next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2); + next_pc = lightrec_emulate_block(inter->block, next_pc); + } + + return next_pc; +} + +static u32 int_branch(struct interpreter *inter, u32 pc, + union code code, bool branch) +{ + u32 next_pc = pc + 4 + ((s16)code.i.imm << 2); + + update_cycles_before_branch(inter); + + if (inter->op->flags & LIGHTREC_NO_DS) { + if (branch) + return int_do_branch(inter, pc, next_pc); + else + return jump_next(inter); + } + + if (!inter->delay_slot) + next_pc = int_delay_slot(inter, next_pc, branch); + + if (branch) + return int_do_branch(inter, pc, next_pc); + + if (inter->op->flags & LIGHTREC_EMULATE_BRANCH) + return pc + 8; + else + return jump_after_branch(inter); +} + +static u32 int_beq(struct interpreter *inter, bool bne) +{ + u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32); + + rs = inter->state->native_reg_cache[inter->op->i.rs]; + rt = inter->state->native_reg_cache[inter->op->i.rt]; + + return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne); +} + +static u32 int_BEQ(struct interpreter *inter) +{ + return int_beq(inter, false); +} + +static u32 int_BNE(struct interpreter *inter) +{ + return int_beq(inter, true); +} + +static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm) +{ + u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32); + s32 rs; + + if (link) + inter->state->native_reg_cache[31] = old_pc + 8; + + rs = (s32)inter->state->native_reg_cache[inter->op->i.rs]; + + return int_branch(inter, old_pc, inter->op->c, + ((regimm && !rs) || rs > 0) ^ lt); +} + +static u32 int_regimm_BLTZ(struct interpreter *inter) +{ + return int_bgez(inter, false, true, true); +} + +static u32 int_regimm_BGEZ(struct interpreter *inter) +{ + return int_bgez(inter, false, false, true); +} + +static u32 int_regimm_BLTZAL(struct interpreter *inter) +{ + return int_bgez(inter, true, true, true); +} + +static u32 int_regimm_BGEZAL(struct interpreter *inter) +{ + return int_bgez(inter, true, false, true); +} + +static u32 int_BLEZ(struct interpreter *inter) +{ + return int_bgez(inter, false, true, false); +} + +static u32 int_BGTZ(struct interpreter *inter) +{ + return int_bgez(inter, false, false, false); +} + +static u32 int_cfc(struct interpreter *inter) +{ + struct lightrec_state *state = inter->state; + const struct opcode *op = inter->op; + u32 val; + + val = lightrec_mfc(state, op->c); + + if (likely(op->r.rt)) + state->native_reg_cache[op->r.rt] = val; + + return jump_next(inter); +} + +static u32 int_ctc(struct interpreter *inter) +{ + struct lightrec_state *state = inter->state; + const struct opcode *op = inter->op; + + lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]); + + /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause), + * return early so that the emulator will be able to check software + * interrupt status. */ + if (op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13)) + return inter->block->pc + (op->offset + 1) * sizeof(u32); + else + return jump_next(inter); +} + +static u32 int_cp0_RFE(struct interpreter *inter) +{ + struct lightrec_state *state = inter->state; + u32 status; + + /* Read CP0 Status register (r12) */ + status = state->ops.cop0_ops.mfc(state, 12); + + /* Switch the bits */ + status = ((status & 0x3c) >> 2) | (status & ~0xf); + + /* Write it back */ + state->ops.cop0_ops.ctc(state, 12, status); + + return jump_next(inter); +} + +static u32 int_CP(struct interpreter *inter) +{ + struct lightrec_state *state = inter->state; + const struct lightrec_cop_ops *ops; + const struct opcode *op = inter->op; + + if ((op->j.imm >> 25) & 1) + ops = &state->ops.cop2_ops; + else + ops = &state->ops.cop0_ops; + + (*ops->op)(state, (op->j.imm) & ~(1 << 25)); + + return jump_next(inter); +} + +static u32 int_ADDI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = reg_cache[op->rs] + (s32)(s16)op->imm; + + return jump_next(inter); +} + +static u32 int_SLTI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = (s32)reg_cache[op->rs] < (s32)(s16)op->imm; + + return jump_next(inter); +} + +static u32 int_SLTIU(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = reg_cache[op->rs] < (u32)(s32)(s16)op->imm; + + return jump_next(inter); +} + +static u32 int_ANDI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = reg_cache[op->rs] & op->imm; + + return jump_next(inter); +} + +static u32 int_ORI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = reg_cache[op->rs] | op->imm; + + return jump_next(inter); +} + +static u32 int_XORI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = reg_cache[op->rs] ^ op->imm; + + return jump_next(inter); +} + +static u32 int_LUI(struct interpreter *inter) +{ + struct opcode_i *op = &inter->op->i; + + inter->state->native_reg_cache[op->rt] = op->imm << 16; + + return jump_next(inter); +} + +static u32 int_io(struct interpreter *inter, bool is_load) +{ + struct opcode_i *op = &inter->op->i; + u32 *reg_cache = inter->state->native_reg_cache; + u32 val; + + val = lightrec_rw(inter->state, inter->op->c, + reg_cache[op->rs], reg_cache[op->rt], + &inter->op->flags); + + if (is_load && op->rt) + reg_cache[op->rt] = val; + + return jump_next(inter); +} + +static u32 int_load(struct interpreter *inter) +{ + return int_io(inter, true); +} + +static u32 int_store(struct interpreter *inter) +{ + u32 next_pc; + + if (likely(!(inter->op->flags & LIGHTREC_SMC))) + return int_io(inter, false); + + lightrec_rw(inter->state, inter->op->c, + inter->state->native_reg_cache[inter->op->i.rs], + inter->state->native_reg_cache[inter->op->i.rt], + &inter->op->flags); + + next_pc = inter->block->pc + (inter->op->offset + 1) * 4; + + /* Invalidate next PC, to force the rest of the block to be rebuilt */ + lightrec_invalidate(inter->state, next_pc, 4); + + return next_pc; +} + +static u32 int_LWC2(struct interpreter *inter) +{ + return int_io(inter, false); +} + +static u32 int_special_SLL(struct interpreter *inter) +{ + struct opcode *op = inter->op; + u32 rt; + + if (op->opcode) { /* Handle NOPs */ + rt = inter->state->native_reg_cache[op->r.rt]; + inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm; + } + + return jump_next(inter); +} + +static u32 int_special_SRL(struct interpreter *inter) +{ + struct opcode *op = inter->op; + u32 rt = inter->state->native_reg_cache[op->r.rt]; + + inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + + return jump_next(inter); +} + +static u32 int_special_SRA(struct interpreter *inter) +{ + struct opcode *op = inter->op; + s32 rt = inter->state->native_reg_cache[op->r.rt]; + + inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + + return jump_next(inter); +} + +static u32 int_special_SLLV(struct interpreter *inter) +{ + struct opcode *op = inter->op; + u32 rs = inter->state->native_reg_cache[op->r.rs]; + u32 rt = inter->state->native_reg_cache[op->r.rt]; + + inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f); + + return jump_next(inter); +} + +static u32 int_special_SRLV(struct interpreter *inter) +{ + struct opcode *op = inter->op; + u32 rs = inter->state->native_reg_cache[op->r.rs]; + u32 rt = inter->state->native_reg_cache[op->r.rt]; + + inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + + return jump_next(inter); +} + +static u32 int_special_SRAV(struct interpreter *inter) +{ + struct opcode *op = inter->op; + u32 rs = inter->state->native_reg_cache[op->r.rs]; + s32 rt = inter->state->native_reg_cache[op->r.rt]; + + inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + + return jump_next(inter); +} + +static u32 int_syscall_break(struct interpreter *inter) +{ + + if (inter->op->r.op == OP_SPECIAL_BREAK) + inter->state->exit_flags |= LIGHTREC_EXIT_BREAK; + else + inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL; + + return inter->block->pc + inter->op->offset * sizeof(u32); +} + +static u32 int_special_MFHI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + + if (likely(op->rd)) + reg_cache[op->rd] = reg_cache[REG_HI]; + + return jump_next(inter); +} + +static u32 int_special_MTHI(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + + reg_cache[REG_HI] = reg_cache[inter->op->r.rs]; + + return jump_next(inter); +} + +static u32 int_special_MFLO(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + + if (likely(op->rd)) + reg_cache[op->rd] = reg_cache[REG_LO]; + + return jump_next(inter); +} + +static u32 int_special_MTLO(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + + reg_cache[REG_LO] = reg_cache[inter->op->r.rs]; + + return jump_next(inter); +} + +static u32 int_special_MULT(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + s32 rs = reg_cache[inter->op->r.rs]; + s32 rt = reg_cache[inter->op->r.rt]; + u64 res = (s64)rs * (s64)rt; + + if (!(inter->op->flags & LIGHTREC_MULT32)) + reg_cache[REG_HI] = res >> 32; + reg_cache[REG_LO] = res; + + return jump_next(inter); +} + +static u32 int_special_MULTU(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + u32 rs = reg_cache[inter->op->r.rs]; + u32 rt = reg_cache[inter->op->r.rt]; + u64 res = (u64)rs * (u64)rt; + + if (!(inter->op->flags & LIGHTREC_MULT32)) + reg_cache[REG_HI] = res >> 32; + reg_cache[REG_LO] = res; + + return jump_next(inter); +} + +static u32 int_special_DIV(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + s32 rs = reg_cache[inter->op->r.rs]; + s32 rt = reg_cache[inter->op->r.rt]; + u32 lo, hi; + + if (rt == 0) { + hi = rs; + lo = (rs < 0) * 2 - 1; + } else { + lo = rs / rt; + hi = rs % rt; + } + + reg_cache[REG_HI] = hi; + reg_cache[REG_LO] = lo; + + return jump_next(inter); +} + +static u32 int_special_DIVU(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + u32 rs = reg_cache[inter->op->r.rs]; + u32 rt = reg_cache[inter->op->r.rt]; + u32 lo, hi; + + if (rt == 0) { + hi = rs; + lo = (u32)-1; + } else { + lo = rs / rt; + hi = rs % rt; + } + + reg_cache[REG_HI] = hi; + reg_cache[REG_LO] = lo; + + return jump_next(inter); +} + +static u32 int_special_ADD(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + s32 rs = reg_cache[op->rs]; + s32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs + rt; + + return jump_next(inter); +} + +static u32 int_special_SUB(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs - rt; + + return jump_next(inter); +} + +static u32 int_special_AND(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs & rt; + + return jump_next(inter); +} + +static u32 int_special_OR(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs | rt; + + return jump_next(inter); +} + +static u32 int_special_XOR(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs ^ rt; + + return jump_next(inter); +} + +static u32 int_special_NOR(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = ~(rs | rt); + + return jump_next(inter); +} + +static u32 int_special_SLT(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + s32 rs = reg_cache[op->rs]; + s32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs < rt; + + return jump_next(inter); +} + +static u32 int_special_SLTU(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + u32 rs = reg_cache[op->rs]; + u32 rt = reg_cache[op->rt]; + + if (likely(op->rd)) + reg_cache[op->rd] = rs < rt; + + return jump_next(inter); +} + +static u32 int_META_SKIP(struct interpreter *inter) +{ + return jump_skip(inter); +} + +static u32 int_META_MOV(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->native_reg_cache; + struct opcode_r *op = &inter->op->r; + + if (likely(op->rd)) + reg_cache[op->rd] = reg_cache[op->rs]; + + return jump_next(inter); +} + +static u32 int_META_SYNC(struct interpreter *inter) +{ + inter->state->current_cycle += inter->cycles; + inter->cycles = 0; + + return jump_skip(inter); +} + +static const lightrec_int_func_t int_standard[64] = { + [OP_SPECIAL] = int_SPECIAL, + [OP_REGIMM] = int_REGIMM, + [OP_J] = int_J, + [OP_JAL] = int_JAL, + [OP_BEQ] = int_BEQ, + [OP_BNE] = int_BNE, + [OP_BLEZ] = int_BLEZ, + [OP_BGTZ] = int_BGTZ, + [OP_ADDI] = int_ADDI, + [OP_ADDIU] = int_ADDI, + [OP_SLTI] = int_SLTI, + [OP_SLTIU] = int_SLTIU, + [OP_ANDI] = int_ANDI, + [OP_ORI] = int_ORI, + [OP_XORI] = int_XORI, + [OP_LUI] = int_LUI, + [OP_CP0] = int_CP0, + [OP_CP2] = int_CP2, + [OP_LB] = int_load, + [OP_LH] = int_load, + [OP_LWL] = int_load, + [OP_LW] = int_load, + [OP_LBU] = int_load, + [OP_LHU] = int_load, + [OP_LWR] = int_load, + [OP_SB] = int_store, + [OP_SH] = int_store, + [OP_SWL] = int_store, + [OP_SW] = int_store, + [OP_SWR] = int_store, + [OP_LWC2] = int_LWC2, + [OP_SWC2] = int_store, + + [OP_META_REG_UNLOAD] = int_META_SKIP, + [OP_META_BEQZ] = int_BEQ, + [OP_META_BNEZ] = int_BNE, + [OP_META_MOV] = int_META_MOV, + [OP_META_SYNC] = int_META_SYNC, +}; + +static const lightrec_int_func_t int_special[64] = { + [OP_SPECIAL_SLL] = int_special_SLL, + [OP_SPECIAL_SRL] = int_special_SRL, + [OP_SPECIAL_SRA] = int_special_SRA, + [OP_SPECIAL_SLLV] = int_special_SLLV, + [OP_SPECIAL_SRLV] = int_special_SRLV, + [OP_SPECIAL_SRAV] = int_special_SRAV, + [OP_SPECIAL_JR] = int_special_JR, + [OP_SPECIAL_JALR] = int_special_JALR, + [OP_SPECIAL_SYSCALL] = int_syscall_break, + [OP_SPECIAL_BREAK] = int_syscall_break, + [OP_SPECIAL_MFHI] = int_special_MFHI, + [OP_SPECIAL_MTHI] = int_special_MTHI, + [OP_SPECIAL_MFLO] = int_special_MFLO, + [OP_SPECIAL_MTLO] = int_special_MTLO, + [OP_SPECIAL_MULT] = int_special_MULT, + [OP_SPECIAL_MULTU] = int_special_MULTU, + [OP_SPECIAL_DIV] = int_special_DIV, + [OP_SPECIAL_DIVU] = int_special_DIVU, + [OP_SPECIAL_ADD] = int_special_ADD, + [OP_SPECIAL_ADDU] = int_special_ADD, + [OP_SPECIAL_SUB] = int_special_SUB, + [OP_SPECIAL_SUBU] = int_special_SUB, + [OP_SPECIAL_AND] = int_special_AND, + [OP_SPECIAL_OR] = int_special_OR, + [OP_SPECIAL_XOR] = int_special_XOR, + [OP_SPECIAL_NOR] = int_special_NOR, + [OP_SPECIAL_SLT] = int_special_SLT, + [OP_SPECIAL_SLTU] = int_special_SLTU, +}; + +static const lightrec_int_func_t int_regimm[64] = { + [OP_REGIMM_BLTZ] = int_regimm_BLTZ, + [OP_REGIMM_BGEZ] = int_regimm_BGEZ, + [OP_REGIMM_BLTZAL] = int_regimm_BLTZAL, + [OP_REGIMM_BGEZAL] = int_regimm_BGEZAL, +}; + +static const lightrec_int_func_t int_cp0[64] = { + [OP_CP0_MFC0] = int_cfc, + [OP_CP0_CFC0] = int_cfc, + [OP_CP0_MTC0] = int_ctc, + [OP_CP0_CTC0] = int_ctc, + [OP_CP0_RFE] = int_cp0_RFE, +}; + +static const lightrec_int_func_t int_cp2_basic[64] = { + [OP_CP2_BASIC_MFC2] = int_cfc, + [OP_CP2_BASIC_CFC2] = int_cfc, + [OP_CP2_BASIC_MTC2] = int_ctc, + [OP_CP2_BASIC_CTC2] = int_ctc, +}; + +static u32 int_SPECIAL(struct interpreter *inter) +{ + lightrec_int_func_t f = int_special[inter->op->r.op]; + if (likely(f)) + return execute(f, inter); + else + return int_unimplemented(inter); +} + +static u32 int_REGIMM(struct interpreter *inter) +{ + lightrec_int_func_t f = int_regimm[inter->op->r.rt]; + if (likely(f)) + return execute(f, inter); + else + return int_unimplemented(inter); +} + +static u32 int_CP0(struct interpreter *inter) +{ + lightrec_int_func_t f = int_cp0[inter->op->r.rs]; + if (likely(f)) + return execute(f, inter); + else + return int_CP(inter); +} + +static u32 int_CP2(struct interpreter *inter) +{ + if (inter->op->r.op == OP_CP2_BASIC) { + lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs]; + if (likely(f)) + return execute(f, inter); + } + + return int_CP(inter); +} + +static u32 lightrec_int_op(struct interpreter *inter) +{ + return execute(int_standard[inter->op->i.op], inter); +} + +static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op) +{ + struct interpreter inter; + u32 pc; + + inter.block = block; + inter.state = block->state; + inter.op = op; + inter.cycles = 0; + inter.delay_slot = false; + + pc = lightrec_int_op(&inter); + + /* Add the cycles of the last branch */ + inter.cycles += lightrec_cycles_of_opcode(inter.op->c); + + block->state->current_cycle += inter.cycles; + + return pc; +} + +u32 lightrec_emulate_block(struct block *block, u32 pc) +{ + u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2; + struct opcode *op; + + for (op = block->opcode_list; + op && (op->offset < offset); op = op->next); + if (op) + return lightrec_emulate_block_list(block, op); + + pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc); + + return 0; +} diff --git a/deps/lightrec/interpreter.h b/deps/lightrec/interpreter.h new file mode 100644 index 0000000..d4177b3 --- /dev/null +++ b/deps/lightrec/interpreter.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __LIGHTREC_INTERPRETER_H__ +#define __LIGHTREC_INTERPRETER_H__ + +#include "lightrec.h" + +struct block; + +u32 lightrec_emulate_block(struct block *block, u32 pc); + +#endif /* __LIGHTREC_INTERPRETER_H__ */ diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h new file mode 100644 index 0000000..4c9c269 --- /dev/null +++ b/deps/lightrec/lightrec-private.h @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2016 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __LIGHTREC_PRIVATE_H__ +#define __LIGHTREC_PRIVATE_H__ + +#include "config.h" +#include "disassembler.h" +#include "lightrec.h" + +#if ENABLE_THREADED_COMPILER +#include <stdatomic.h> +#endif + +#define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0) +#define BIT(x) (1 << (x)) + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x),1) +# define unlikely(x) __builtin_expect(!!(x),0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LE32TOH(x) __builtin_bswap32(x) +# define HTOLE32(x) __builtin_bswap32(x) +# define LE16TOH(x) __builtin_bswap16(x) +# define HTOLE16(x) __builtin_bswap16(x) +#else +# define LE32TOH(x) (x) +# define HTOLE32(x) (x) +# define LE16TOH(x) (x) +# define HTOLE16(x) (x) +#endif + +/* Flags for (struct block *)->flags */ +#define BLOCK_NEVER_COMPILE BIT(0) +#define BLOCK_SHOULD_RECOMPILE BIT(1) +#define BLOCK_FULLY_TAGGED BIT(2) + +#define RAM_SIZE 0x200000 +#define BIOS_SIZE 0x80000 + +#define CODE_LUT_SIZE ((RAM_SIZE + BIOS_SIZE) >> 2) + +/* Definition of jit_state_t (avoids inclusion of <lightning.h>) */ +struct jit_node; +struct jit_state; +typedef struct jit_state jit_state_t; + +struct blockcache; +struct recompiler; +struct regcache; +struct opcode; +struct tinymm; + +struct block { + jit_state_t *_jit; + struct lightrec_state *state; + struct opcode *opcode_list; + void (*function)(void); + u32 pc; + u32 hash; +#if ENABLE_THREADED_COMPILER + atomic_flag op_list_freed; +#endif + unsigned int code_size; + u16 flags; + u16 nb_ops; + const struct lightrec_mem_map *map; + struct block *next; +}; + +struct lightrec_branch { + struct jit_node *branch; + u32 target; +}; + +struct lightrec_branch_target { + struct jit_node *label; + u32 offset; +}; + +struct lightrec_state { + u32 native_reg_cache[34]; + u32 next_pc; + u32 current_cycle; + u32 target_cycle; + u32 exit_flags; + struct block *dispatcher, *rw_wrapper, *rw_generic_wrapper, + *mfc_wrapper, *mtc_wrapper, *rfe_wrapper, *cp_wrapper, + *syscall_wrapper, *break_wrapper; + void *rw_func, *rw_generic_func, *mfc_func, *mtc_func, *rfe_func, + *cp_func, *syscall_func, *break_func; + struct jit_node *branches[512]; + struct lightrec_branch local_branches[512]; + struct lightrec_branch_target targets[512]; + unsigned int nb_branches; + unsigned int nb_local_branches; + unsigned int nb_targets; + struct tinymm *tinymm; + struct blockcache *block_cache; + struct regcache *reg_cache; + struct recompiler *rec; + void (*eob_wrapper_func)(void); + void (*get_next_block)(void); + struct lightrec_ops ops; + unsigned int cycles; + unsigned int nb_maps; + const struct lightrec_mem_map *maps; + uintptr_t offset_ram, offset_bios, offset_scratch; + _Bool mirrors_mapped; + _Bool invalidate_from_dma_only; + void *code_lut[]; +}; + +u32 lightrec_rw(struct lightrec_state *state, union code op, + u32 addr, u32 data, u16 *flags); + +void lightrec_free_block(struct block *block); + +void remove_from_code_lut(struct blockcache *cache, struct block *block); + +static inline u32 kunseg(u32 addr) +{ + if (unlikely(addr >= 0xa0000000)) + return addr - 0xa0000000; + else + return addr &~ 0x80000000; +} + +static inline u32 lut_offset(u32 pc) +{ + if (pc & BIT(28)) + return ((pc & (BIOS_SIZE - 1)) + RAM_SIZE) >> 2; // BIOS + else + return (pc & (RAM_SIZE - 1)) >> 2; // RAM +} + +void lightrec_mtc(struct lightrec_state *state, union code op, u32 data); +u32 lightrec_mfc(struct lightrec_state *state, union code op); + +union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); + +struct block * lightrec_get_block(struct lightrec_state *state, u32 pc); +int lightrec_compile_block(struct block *block); + +#endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c new file mode 100644 index 0000000..47c49c8 --- /dev/null +++ b/deps/lightrec/lightrec.c @@ -0,0 +1,1265 @@ +/* + * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "blockcache.h" +#include "config.h" +#include "debug.h" +#include "disassembler.h" +#include "emitter.h" +#include "interpreter.h" +#include "lightrec.h" +#include "memmanager.h" +#include "recompiler.h" +#include "regcache.h" +#include "optimizer.h" + +#include <errno.h> +#include <lightning.h> +#include <limits.h> +#if ENABLE_THREADED_COMPILER +#include <stdatomic.h> +#endif +#include <stdbool.h> +#include <stddef.h> +#include <string.h> +#if ENABLE_TINYMM +#include <tinymm.h> +#endif + +#define GENMASK(h, l) \ + (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) + +static struct block * lightrec_precompile_block(struct lightrec_state *state, + u32 pc); + +static void __segfault_cb(struct lightrec_state *state, u32 addr) +{ + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + pr_err("Segmentation fault in recompiled code: invalid " + "load/store at address 0x%08x\n", addr); +} + +static u32 lightrec_rw_ops(struct lightrec_state *state, union code op, + const struct lightrec_mem_map_ops *ops, u32 addr, u32 data) +{ + switch (op.i.op) { + case OP_SB: + ops->sb(state, addr, (u8) data); + return 0; + case OP_SH: + ops->sh(state, addr, (u16) data); + return 0; + case OP_SWL: + case OP_SWR: + case OP_SW: + ops->sw(state, addr, data); + return 0; + case OP_LB: + return (s32) (s8) ops->lb(state, addr); + case OP_LBU: + return ops->lb(state, addr); + case OP_LH: + return (s32) (s16) ops->lh(state, addr); + case OP_LHU: + return ops->lh(state, addr); + case OP_LW: + default: + return ops->lw(state, addr); + } +} + +static void lightrec_invalidate_map(struct lightrec_state *state, + const struct lightrec_mem_map *map, u32 addr) +{ + if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) + state->code_lut[lut_offset(addr)] = NULL; +} + +static const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, u32 kaddr) +{ + unsigned int i; + + for (i = 0; i < state->nb_maps; i++) { + const struct lightrec_mem_map *map = &state->maps[i]; + + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return map; + } + + return NULL; +} + +u32 lightrec_rw(struct lightrec_state *state, union code op, + u32 addr, u32 data, u16 *flags) +{ + const struct lightrec_mem_map *map; + u32 shift, mem_data, mask, pc; + uintptr_t new_addr; + u32 kaddr; + + addr += (s16) op.i.imm; + kaddr = kunseg(addr); + + map = lightrec_get_map(state, kaddr); + if (!map) { + __segfault_cb(state, addr); + return 0; + } + + pc = map->pc; + + if (unlikely(map->ops)) { + if (flags) + *flags |= LIGHTREC_HW_IO; + + return lightrec_rw_ops(state, op, map->ops, addr, data); + } + + while (map->mirror_of) + map = map->mirror_of; + + if (flags) + *flags |= LIGHTREC_DIRECT_IO; + + kaddr -= pc; + new_addr = (uintptr_t) map->address + kaddr; + + switch (op.i.op) { + case OP_SB: + *(u8 *) new_addr = (u8) data; + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr); + return 0; + case OP_SH: + *(u16 *) new_addr = HTOLE16((u16) data); + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr); + return 0; + case OP_SWL: + shift = kaddr & 3; + mem_data = LE32TOH(*(u32 *)(new_addr & ~3)); + mask = GENMASK(31, (shift + 1) * 8); + + *(u32 *)(new_addr & ~3) = HTOLE32((data >> ((3 - shift) * 8)) + | (mem_data & mask)); + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr & ~0x3); + return 0; + case OP_SWR: + shift = kaddr & 3; + mem_data = LE32TOH(*(u32 *)(new_addr & ~3)); + mask = (1 << (shift * 8)) - 1; + + *(u32 *)(new_addr & ~3) = HTOLE32((data << (shift * 8)) + | (mem_data & mask)); + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr & ~0x3); + return 0; + case OP_SW: + *(u32 *) new_addr = HTOLE32(data); + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr); + return 0; + case OP_SWC2: + *(u32 *) new_addr = HTOLE32(state->ops.cop2_ops.mfc(state, + op.i.rt)); + if (!state->invalidate_from_dma_only) + lightrec_invalidate_map(state, map, kaddr); + return 0; + case OP_LB: + return (s32) *(s8 *) new_addr; + case OP_LBU: + return *(u8 *) new_addr; + case OP_LH: + return (s32)(s16) LE16TOH(*(u16 *) new_addr); + case OP_LHU: + return LE16TOH(*(u16 *) new_addr); + case OP_LWL: + shift = kaddr & 3; + mem_data = LE32TOH(*(u32 *)(new_addr & ~3)); + mask = (1 << (24 - shift * 8)) - 1; + + return (data & mask) | (mem_data << (24 - shift * 8)); + case OP_LWR: + shift = kaddr & 3; + mem_data = LE32TOH(*(u32 *)(new_addr & ~3)); + mask = GENMASK(31, 32 - shift * 8); + + return (data & mask) | (mem_data >> (shift * 8)); + case OP_LWC2: + state->ops.cop2_ops.mtc(state, op.i.rt, + LE32TOH(*(u32 *) new_addr)); + return 0; + case OP_LW: + default: + return LE32TOH(*(u32 *) new_addr); + } +} + +static void lightrec_rw_helper(struct lightrec_state *state, + union code op, u16 *flags) +{ + u32 ret = lightrec_rw(state, op, + state->native_reg_cache[op.i.rs], + state->native_reg_cache[op.i.rt], flags); + + switch (op.i.op) { + case OP_LB: + case OP_LBU: + case OP_LH: + case OP_LHU: + case OP_LWL: + case OP_LWR: + case OP_LW: + if (op.i.rt) + state->native_reg_cache[op.i.rt] = ret; + default: /* fall-through */ + break; + } +} + +static void lightrec_rw_cb(struct lightrec_state *state, union code op) +{ + lightrec_rw_helper(state, op, NULL); +} + +static void lightrec_rw_generic_cb(struct lightrec_state *state, + struct opcode *op, struct block *block) +{ + bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + + lightrec_rw_helper(state, op->c, &op->flags); + + if (!was_tagged) { + pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been " + "tagged - flag for recompilation\n", + block->pc, op->offset << 2); + + lightrec_mark_for_recompilation(state->block_cache, block); + } +} + +u32 lightrec_mfc(struct lightrec_state *state, union code op) +{ + bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) || + (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2); + u32 (*func)(struct lightrec_state *, u8); + const struct lightrec_cop_ops *ops; + + if (op.i.op == OP_CP0) + ops = &state->ops.cop0_ops; + else + ops = &state->ops.cop2_ops; + + if (is_cfc) + func = ops->cfc; + else + func = ops->mfc; + + return (*func)(state, op.r.rd); +} + +static void lightrec_mfc_cb(struct lightrec_state *state, union code op) +{ + u32 rt = lightrec_mfc(state, op); + + if (op.r.rt) + state->native_reg_cache[op.r.rt] = rt; +} + +void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +{ + bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) || + (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2); + void (*func)(struct lightrec_state *, u8, u32); + const struct lightrec_cop_ops *ops; + + if (op.i.op == OP_CP0) + ops = &state->ops.cop0_ops; + else + ops = &state->ops.cop2_ops; + + if (is_ctc) + func = ops->ctc; + else + func = ops->mtc; + + (*func)(state, op.r.rd, data); +} + +static void lightrec_mtc_cb(struct lightrec_state *state, union code op) +{ + lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]); +} + +static void lightrec_rfe_cb(struct lightrec_state *state, union code op) +{ + u32 status; + + /* Read CP0 Status register (r12) */ + status = state->ops.cop0_ops.mfc(state, 12); + + /* Switch the bits */ + status = ((status & 0x3c) >> 2) | (status & ~0xf); + + /* Write it back */ + state->ops.cop0_ops.ctc(state, 12, status); +} + +static void lightrec_cp_cb(struct lightrec_state *state, union code op) +{ + void (*func)(struct lightrec_state *, u32); + + if ((op.opcode >> 25) & 1) + func = state->ops.cop2_ops.op; + else + func = state->ops.cop0_ops.op; + + (*func)(state, op.opcode); +} + +static void lightrec_syscall_cb(struct lightrec_state *state, union code op) +{ + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); +} + +static void lightrec_break_cb(struct lightrec_state *state, union code op) +{ + lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); +} + +struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) +{ + struct block *block = lightrec_find_block(state->block_cache, pc); + + if (block && lightrec_block_is_outdated(block)) { + pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); + + /* Make sure the recompiler isn't processing the block we'll + * destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + lightrec_free_block(block); + block = NULL; + } + + if (!block) { + block = lightrec_precompile_block(state, pc); + if (!block) { + pr_err("Unable to recompile block at PC 0x%x\n", pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + return NULL; + } + + lightrec_register_block(state->block_cache, block); + } + + return block; +} + +static void * get_next_block_func(struct lightrec_state *state, u32 pc) +{ + struct block *block; + bool should_recompile; + void *func; + + for (;;) { + func = state->code_lut[lut_offset(pc)]; + if (func && func != state->get_next_block) + return func; + + block = lightrec_get_block(state, pc); + + if (unlikely(!block)) + return NULL; + + should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE; + + if (unlikely(should_recompile)) { + pr_debug("Block at PC 0x%08x should recompile" + " - freeing old code\n", pc); + + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + remove_from_code_lut(state->block_cache, block); + lightrec_unregister(MEM_FOR_CODE, block->code_size); + if (block->_jit) + _jit_destroy_state(block->_jit); + block->_jit = NULL; + block->function = NULL; + block->flags &= ~BLOCK_SHOULD_RECOMPILE; + } + + if (ENABLE_THREADED_COMPILER && likely(!should_recompile)) + func = lightrec_recompiler_run_first_pass(block, &pc); + else + func = block->function; + + if (likely(func)) + return func; + + /* Block wasn't compiled yet - run the interpreter */ + if (!ENABLE_THREADED_COMPILER && + ((ENABLE_FIRST_PASS && likely(!should_recompile)) || + unlikely(block->flags & BLOCK_NEVER_COMPILE))) + pc = lightrec_emulate_block(block, pc); + + if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) { + /* Then compile it using the profiled data */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_add(state->rec, block); + else + lightrec_compile_block(block); + } + + if (state->exit_flags != LIGHTREC_EXIT_NORMAL || + state->current_cycle >= state->target_cycle) { + state->next_pc = pc; + return NULL; + } + } +} + +static s32 c_generic_function_wrapper(struct lightrec_state *state, + s32 cycles_delta, + void (*f)(struct lightrec_state *, + struct opcode *, + struct block *), + struct opcode *op, struct block *block) +{ + state->current_cycle = state->target_cycle - cycles_delta; + + (*f)(state, op, block); + + return state->target_cycle - state->current_cycle; +} + +static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, + void (*f)(struct lightrec_state *, union code), + union code op) +{ + state->current_cycle = state->target_cycle - cycles_delta; + + (*f)(state, op); + + return state->target_cycle - state->current_cycle; +} + +static struct block * generate_wrapper(struct lightrec_state *state, + void *f, bool generic) +{ + struct block *block; + jit_state_t *_jit; + unsigned int i; + int stack_ptr; + jit_word_t code_size; + jit_node_t *to_tramp, *to_fn_epilog; + + block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); + if (!block) + goto err_no_mem; + + _jit = jit_new_state(); + if (!_jit) + goto err_free_block; + + jit_name("RW wrapper"); + jit_note(__FILE__, __LINE__); + + /* Wrapper entry point */ + jit_prolog(); + + stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); + + for (i = 0; i < NUM_TEMPS; i++) + jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); + + /* Jump to the trampoline */ + to_tramp = jit_jmpi(); + + /* The trampoline will jump back here */ + to_fn_epilog = jit_label(); + + for (i = 0; i < NUM_TEMPS; i++) + jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + + jit_ret(); + jit_epilog(); + + /* Trampoline entry point. + * The sole purpose of the trampoline is to cheese Lightning not to + * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we + * do want to return to the caller with this register modified. */ + jit_prolog(); + jit_tramp(256); + jit_patch(to_tramp); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(LIGHTREC_REG_CYCLE); + jit_pushargi((uintptr_t)f); + jit_pushargr(JIT_R0); + if (generic) { + jit_pushargr(JIT_R1); + jit_finishi(c_generic_function_wrapper); + } else { + jit_finishi(c_function_wrapper); + } + +#if __WORDSIZE == 64 + jit_retval_i(LIGHTREC_REG_CYCLE); +#else + jit_retval(LIGHTREC_REG_CYCLE); +#endif + + jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_epilog(); + + block->state = state; + block->_jit = _jit; + block->function = jit_emit(); + block->opcode_list = NULL; + block->flags = 0; + block->nb_ops = 0; + + jit_get_code(&code_size); + lightrec_register(MEM_FOR_CODE, code_size); + + block->code_size = code_size; + + if (ENABLE_DISASSEMBLER) { + pr_debug("Wrapper block:\n"); + jit_disassemble(); + } + + jit_clear_state(); + return block; + +err_free_block: + lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); +err_no_mem: + pr_err("Unable to compile wrapper: Out of memory\n"); + return NULL; +} + +static struct block * generate_dispatcher(struct lightrec_state *state) +{ + struct block *block; + jit_state_t *_jit; + jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2; + unsigned int i; + u32 offset, ram_len; + jit_word_t code_size; + + block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); + if (!block) + goto err_no_mem; + + _jit = jit_new_state(); + if (!_jit) + goto err_free_block; + + jit_name("dispatcher"); + jit_note(__FILE__, __LINE__); + + jit_prolog(); + jit_frame(256); + + jit_getarg(JIT_R0, jit_arg()); +#if __WORDSIZE == 64 + jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); +#else + jit_getarg(LIGHTREC_REG_CYCLE, jit_arg()); +#endif + + /* Force all callee-saved registers to be pushed on the stack */ + for (i = 0; i < NUM_REGS; i++) + jit_movr(JIT_V(i), JIT_V(i)); + + /* Pass lightrec_state structure to blocks, using the last callee-saved + * register that Lightning provides */ + jit_movi(LIGHTREC_REG_STATE, (intptr_t) state); + + loop = jit_label(); + + /* Call the block's code */ + jit_jmpr(JIT_R0); + + /* The block will jump here, with the number of cycles remaining in + * LIGHTREC_REG_CYCLE */ + addr2 = jit_indirect(); + + /* Jump to end if state->target_cycle < state->current_cycle */ + to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); + + /* Convert next PC to KUNSEG and avoid mirrors */ + ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; + jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); + to_c = jit_bgei(JIT_R0, ram_len); + + /* Fast path: code is running from RAM, use the code LUT */ +#if __WORDSIZE == 64 + jit_lshi(JIT_R0, JIT_R0, 1); +#endif + jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); + jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + + /* If we get non-NULL, loop */ + jit_patch_at(jit_bnei(JIT_R0, 0), loop); + + /* Slow path: call C function get_next_block_func() */ + jit_patch(to_c); + + if (ENABLE_FIRST_PASS) { + /* We may call the interpreter - update state->current_cycle */ + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, JIT_R1); + } + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); + + /* Get the next block */ + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V0); + jit_finishi(&get_next_block_func); + jit_retval(JIT_R0); + + if (ENABLE_FIRST_PASS) { + /* The interpreter may have updated state->current_cycle and + * state->target_cycle - recalc the delta */ + jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + } + + /* If we get non-NULL, loop */ + jit_patch_at(jit_bnei(JIT_R0, 0), loop); + + to_end2 = jit_jmpi(); + + /* When exiting, the recompiled code will jump to that address */ + jit_note(__FILE__, __LINE__); + jit_patch(to_end); + + /* Store back the next_pc to the lightrec_state structure */ + offset = offsetof(struct lightrec_state, next_pc); + jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0); + + jit_patch(to_end2); + + jit_retr(LIGHTREC_REG_CYCLE); + jit_epilog(); + + block->state = state; + block->_jit = _jit; + block->function = jit_emit(); + block->opcode_list = NULL; + block->flags = 0; + block->nb_ops = 0; + + jit_get_code(&code_size); + lightrec_register(MEM_FOR_CODE, code_size); + + block->code_size = code_size; + + state->eob_wrapper_func = jit_address(addr2); + state->get_next_block = jit_address(addr); + + if (ENABLE_DISASSEMBLER) { + pr_debug("Dispatcher block:\n"); + jit_disassemble(); + } + + /* We're done! */ + jit_clear_state(); + return block; + +err_free_block: + lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); +err_no_mem: + pr_err("Unable to compile dispatcher: Out of memory\n"); + return NULL; +} + +union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) +{ + u32 addr, kunseg_pc = kunseg(pc); + const u32 *code; + const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc); + + addr = kunseg_pc - map->pc; + + while (map->mirror_of) + map = map->mirror_of; + + code = map->address + addr; + + return (union code) *code; +} + +static struct block * lightrec_precompile_block(struct lightrec_state *state, + u32 pc) +{ + struct opcode *list; + struct block *block; + const u32 *code; + u32 addr, kunseg_pc = kunseg(pc); + const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc); + unsigned int length; + + if (!map) + return NULL; + + addr = kunseg_pc - map->pc; + + while (map->mirror_of) + map = map->mirror_of; + + code = map->address + addr; + + block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); + if (!block) { + pr_err("Unable to recompile block: Out of memory\n"); + return NULL; + } + + list = lightrec_disassemble(state, code, &length); + if (!list) { + lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); + return NULL; + } + + block->pc = pc; + block->state = state; + block->_jit = NULL; + block->function = NULL; + block->opcode_list = list; + block->map = map; + block->next = NULL; + block->flags = 0; + block->code_size = 0; +#if ENABLE_THREADED_COMPILER + block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; +#endif + block->nb_ops = length / sizeof(u32); + + lightrec_optimize(block); + + length = block->nb_ops * sizeof(u32); + + lightrec_register(MEM_FOR_MIPS_CODE, length); + + if (ENABLE_DISASSEMBLER) { + pr_debug("Disassembled block at PC: 0x%x\n", block->pc); + lightrec_print_disassembly(block, code, length); + } + + pr_debug("Block size: %lu opcodes\n", block->nb_ops); + + /* If the first opcode is an 'impossible' branch, never compile the + * block */ + if (list->flags & LIGHTREC_EMULATE_BRANCH) + block->flags |= BLOCK_NEVER_COMPILE; + + block->hash = lightrec_calculate_block_hash(block); + + return block; +} + +static bool lightrec_block_is_fully_tagged(struct block *block) +{ + struct opcode *op; + + for (op = block->opcode_list; op; op = op->next) { + /* Verify that all load/stores of the opcode list + * Check all loads/stores of the opcode list and mark the + * block as fully compiled if they all have been tagged. */ + switch (op->c.i.op) { + case OP_LB: + case OP_LH: + case OP_LWL: + case OP_LW: + case OP_LBU: + case OP_LHU: + case OP_LWR: + case OP_SB: + case OP_SH: + case OP_SWL: + case OP_SW: + case OP_SWR: + case OP_LWC2: + case OP_SWC2: + if (!(op->flags & (LIGHTREC_DIRECT_IO | + LIGHTREC_HW_IO))) + return false; + default: /* fall-through */ + continue; + } + } + + return true; +} + +int lightrec_compile_block(struct block *block) +{ + struct lightrec_state *state = block->state; + bool op_list_freed = false, fully_tagged = false; + struct opcode *elm; + jit_state_t *_jit; + jit_node_t *start_of_block; + bool skip_next = false; + jit_word_t code_size; + unsigned int i, j; + u32 next_pc; + + fully_tagged = lightrec_block_is_fully_tagged(block); + if (fully_tagged) + block->flags |= BLOCK_FULLY_TAGGED; + + _jit = jit_new_state(); + if (!_jit) + return -ENOMEM; + + block->_jit = _jit; + + lightrec_regcache_reset(state->reg_cache); + state->cycles = 0; + state->nb_branches = 0; + state->nb_local_branches = 0; + state->nb_targets = 0; + + jit_prolog(); + jit_tramp(256); + + start_of_block = jit_label(); + + for (elm = block->opcode_list; elm; elm = elm->next) { + next_pc = block->pc + elm->offset * sizeof(u32); + + if (skip_next) { + skip_next = false; + continue; + } + + state->cycles += lightrec_cycles_of_opcode(elm->c); + + if (elm->flags & LIGHTREC_EMULATE_BRANCH) { + pr_debug("Branch at offset 0x%x will be emulated\n", + elm->offset << 2); + lightrec_emit_eob(block, elm, next_pc); + skip_next = !(elm->flags & LIGHTREC_NO_DS); + } else if (elm->opcode) { + lightrec_rec_opcode(block, elm, next_pc); + skip_next = has_delay_slot(elm->c) && + !(elm->flags & LIGHTREC_NO_DS); +#if _WIN32 + /* FIXME: GNU Lightning on Windows seems to use our + * mapped registers as temporaries. Until the actual bug + * is found and fixed, unconditionally mark our + * registers as live here. */ + lightrec_regcache_mark_live(state->reg_cache, _jit); +#endif + } + } + + for (i = 0; i < state->nb_branches; i++) + jit_patch(state->branches[i]); + + for (i = 0; i < state->nb_local_branches; i++) { + struct lightrec_branch *branch = &state->local_branches[i]; + + pr_debug("Patch local branch to offset 0x%x\n", + branch->target << 2); + + if (branch->target == 0) { + jit_patch_at(branch->branch, start_of_block); + continue; + } + + for (j = 0; j < state->nb_targets; j++) { + if (state->targets[j].offset == branch->target) { + jit_patch_at(branch->branch, + state->targets[j].label); + break; + } + } + + if (j == state->nb_targets) + pr_err("Unable to find branch target\n"); + } + + jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, eob_wrapper_func)); + + jit_jmpr(JIT_R0); + + jit_ret(); + jit_epilog(); + + block->function = jit_emit(); + + /* Add compiled function to the LUT */ + state->code_lut[lut_offset(block->pc)] = block->function; + + jit_get_code(&code_size); + lightrec_register(MEM_FOR_CODE, code_size); + + block->code_size = code_size; + + if (ENABLE_DISASSEMBLER) { + pr_debug("Compiling block at PC: 0x%x\n", block->pc); + jit_disassemble(); + } + + jit_clear_state(); + +#if ENABLE_THREADED_COMPILER + if (fully_tagged) + op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); +#endif + if (fully_tagged && !op_list_freed) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + lightrec_free_opcode_list(state, block->opcode_list); + block->opcode_list = NULL; + } + + return 0; +} + +u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) +{ + s32 (*func)(void *, s32) = (void *)state->dispatcher->function; + void *block_trace; + s32 cycles_delta; + + state->exit_flags = LIGHTREC_EXIT_NORMAL; + + /* Handle the cycle counter overflowing */ + if (unlikely(target_cycle < state->current_cycle)) + target_cycle = UINT_MAX; + + state->target_cycle = target_cycle; + + block_trace = get_next_block_func(state, pc); + if (block_trace) { + cycles_delta = state->target_cycle - state->current_cycle; + + cycles_delta = (*func)(block_trace, cycles_delta); + + state->current_cycle = state->target_cycle - cycles_delta; + } + + return state->next_pc; +} + +u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) +{ + return lightrec_execute(state, pc, state->current_cycle); +} + +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) +{ + struct block *block = lightrec_get_block(state, pc); + if (!block) + return 0; + + state->exit_flags = LIGHTREC_EXIT_NORMAL; + + return lightrec_emulate_block(block, pc); +} + +void lightrec_free_block(struct block *block) +{ + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); + if (block->opcode_list) + lightrec_free_opcode_list(block->state, block->opcode_list); + if (block->_jit) + _jit_destroy_state(block->_jit); + lightrec_unregister(MEM_FOR_CODE, block->code_size); + lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block); +} + +struct lightrec_state * lightrec_init(char *argv0, + const struct lightrec_mem_map *map, + size_t nb, + const struct lightrec_ops *ops) +{ + struct lightrec_state *state; + + /* Sanity-check ops */ + if (!ops || + !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc || + !ops->cop0_ops.ctc || !ops->cop0_ops.op || + !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc || + !ops->cop2_ops.ctc || !ops->cop2_ops.op) { + pr_err("Missing callbacks in lightrec_ops structure\n"); + return NULL; + } + + init_jit(argv0); + + state = calloc(1, sizeof(*state) + + sizeof(*state->code_lut) * CODE_LUT_SIZE); + if (!state) + goto err_finish_jit; + + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + + sizeof(*state->code_lut) * CODE_LUT_SIZE); + +#if ENABLE_TINYMM + state->tinymm = tinymm_init(malloc, free, 4096); + if (!state->tinymm) + goto err_free_state; +#endif + + state->block_cache = lightrec_blockcache_init(state); + if (!state->block_cache) + goto err_free_tinymm; + + state->reg_cache = lightrec_regcache_init(state); + if (!state->reg_cache) + goto err_free_block_cache; + + if (ENABLE_THREADED_COMPILER) { + state->rec = lightrec_recompiler_init(state); + if (!state->rec) + goto err_free_reg_cache; + } + + state->nb_maps = nb; + state->maps = map; + + memcpy(&state->ops, ops, sizeof(*ops)); + + state->dispatcher = generate_dispatcher(state); + if (!state->dispatcher) + goto err_free_recompiler; + + state->rw_generic_wrapper = generate_wrapper(state, + lightrec_rw_generic_cb, + true); + if (!state->rw_generic_wrapper) + goto err_free_dispatcher; + + state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false); + if (!state->rw_wrapper) + goto err_free_generic_rw_wrapper; + + state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false); + if (!state->mfc_wrapper) + goto err_free_rw_wrapper; + + state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false); + if (!state->mtc_wrapper) + goto err_free_mfc_wrapper; + + state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false); + if (!state->rfe_wrapper) + goto err_free_mtc_wrapper; + + state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false); + if (!state->cp_wrapper) + goto err_free_rfe_wrapper; + + state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb, + false); + if (!state->syscall_wrapper) + goto err_free_cp_wrapper; + + state->break_wrapper = generate_wrapper(state, lightrec_break_cb, + false); + if (!state->break_wrapper) + goto err_free_syscall_wrapper; + + state->rw_generic_func = state->rw_generic_wrapper->function; + state->rw_func = state->rw_wrapper->function; + state->mfc_func = state->mfc_wrapper->function; + state->mtc_func = state->mtc_wrapper->function; + state->rfe_func = state->rfe_wrapper->function; + state->cp_func = state->cp_wrapper->function; + state->syscall_func = state->syscall_wrapper->function; + state->break_func = state->break_wrapper->function; + + map = &state->maps[PSX_MAP_BIOS]; + state->offset_bios = (uintptr_t)map->address - map->pc; + + map = &state->maps[PSX_MAP_SCRATCH_PAD]; + state->offset_scratch = (uintptr_t)map->address - map->pc; + + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; + state->offset_ram = (uintptr_t)map->address - map->pc; + + if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 && + state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 && + state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000) + state->mirrors_mapped = true; + + return state; + +err_free_syscall_wrapper: + lightrec_free_block(state->syscall_wrapper); +err_free_cp_wrapper: + lightrec_free_block(state->cp_wrapper); +err_free_rfe_wrapper: + lightrec_free_block(state->rfe_wrapper); +err_free_mtc_wrapper: + lightrec_free_block(state->mtc_wrapper); +err_free_mfc_wrapper: + lightrec_free_block(state->mfc_wrapper); +err_free_rw_wrapper: + lightrec_free_block(state->rw_wrapper); +err_free_generic_rw_wrapper: + lightrec_free_block(state->rw_generic_wrapper); +err_free_dispatcher: + lightrec_free_block(state->dispatcher); +err_free_recompiler: + if (ENABLE_THREADED_COMPILER) + lightrec_free_recompiler(state->rec); +err_free_reg_cache: + lightrec_free_regcache(state->reg_cache); +err_free_block_cache: + lightrec_free_block_cache(state->block_cache); +err_free_tinymm: +#if ENABLE_TINYMM + tinymm_shutdown(state->tinymm); +err_free_state: +#endif + lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + + sizeof(*state->code_lut) * CODE_LUT_SIZE); + free(state); +err_finish_jit: + finish_jit(); + return NULL; +} + +void lightrec_destroy(struct lightrec_state *state) +{ + if (ENABLE_THREADED_COMPILER) + lightrec_free_recompiler(state->rec); + + lightrec_free_regcache(state->reg_cache); + lightrec_free_block_cache(state->block_cache); + lightrec_free_block(state->dispatcher); + lightrec_free_block(state->rw_generic_wrapper); + lightrec_free_block(state->rw_wrapper); + lightrec_free_block(state->mfc_wrapper); + lightrec_free_block(state->mtc_wrapper); + lightrec_free_block(state->rfe_wrapper); + lightrec_free_block(state->cp_wrapper); + lightrec_free_block(state->syscall_wrapper); + lightrec_free_block(state->break_wrapper); + finish_jit(); + +#if ENABLE_TINYMM + tinymm_shutdown(state->tinymm); +#endif + lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + + sizeof(*state->code_lut) * CODE_LUT_SIZE); + free(state); +} + +void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) +{ + u32 kaddr = kunseg(addr & ~0x3); + const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr); + + if (map) { + while (map->mirror_of) + map = map->mirror_of; + + if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM]) + return; + + /* Handle mirrors */ + kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1); + + for (; len > 4; len -= 4, kaddr += 4) + lightrec_invalidate_map(state, map, kaddr); + + lightrec_invalidate_map(state, map, kaddr); + } +} + +void lightrec_invalidate_all(struct lightrec_state *state) +{ + memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); +} + +void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) +{ + if (state->invalidate_from_dma_only != dma_only) + lightrec_invalidate_all(state); + + state->invalidate_from_dma_only = dma_only; +} + +void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags) +{ + if (flags != LIGHTREC_EXIT_NORMAL) { + state->exit_flags |= flags; + state->target_cycle = state->current_cycle; + } +} + +u32 lightrec_exit_flags(struct lightrec_state *state) +{ + return state->exit_flags; +} + +void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]) +{ + memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache)); +} + +void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34]) +{ + memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache)); +} + +u32 lightrec_current_cycle_count(const struct lightrec_state *state) +{ + return state->current_cycle; +} + +void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles) +{ + state->current_cycle = cycles; + + if (state->target_cycle < cycles) + state->target_cycle = cycles; +} + +void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles) +{ + if (state->exit_flags == LIGHTREC_EXIT_NORMAL) { + if (cycles < state->current_cycle) + cycles = state->current_cycle; + + state->target_cycle = cycles; + } +} diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h new file mode 100644 index 0000000..d3d896c --- /dev/null +++ b/deps/lightrec/lightrec.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2016 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __LIGHTREC_H__ +#define __LIGHTREC_H__ + +#ifdef __cplusplus +#define _Bool bool +extern "C" { +#endif + +#include <stddef.h> +#include <stdint.h> + +#ifdef _WIN32 +# ifdef lightrec_EXPORTS +# define __api __declspec(dllexport) +# elif !defined(LIGHTREC_STATIC) +# define __api __declspec(dllimport) +# else +# define __api +# endif +#elif __GNUC__ >= 4 +# define __api __attribute__((visibility ("default"))) +#else +# define __api +#endif + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +typedef int64_t s64; +typedef int32_t s32; +typedef int16_t s16; +typedef int8_t s8; + +struct lightrec_state; +struct lightrec_mem_map; + +/* Exit flags */ +#define LIGHTREC_EXIT_NORMAL (0) +#define LIGHTREC_EXIT_SYSCALL (1 << 0) +#define LIGHTREC_EXIT_BREAK (1 << 1) +#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 2) +#define LIGHTREC_EXIT_SEGFAULT (1 << 3) + +enum psx_map { + PSX_MAP_KERNEL_USER_RAM, + PSX_MAP_BIOS, + PSX_MAP_SCRATCH_PAD, + PSX_MAP_PARALLEL_PORT, + PSX_MAP_HW_REGISTERS, + PSX_MAP_CACHE_CONTROL, + PSX_MAP_MIRROR1, + PSX_MAP_MIRROR2, + PSX_MAP_MIRROR3, +}; + +enum mem_type { + MEM_FOR_CODE, + MEM_FOR_MIPS_CODE, + MEM_FOR_IR, + MEM_FOR_LIGHTREC, + MEM_TYPE_END, +}; + +struct lightrec_mem_map_ops { + void (*sb)(struct lightrec_state *, u32 addr, u8 data); + void (*sh)(struct lightrec_state *, u32 addr, u16 data); + void (*sw)(struct lightrec_state *, u32 addr, u32 data); + u8 (*lb)(struct lightrec_state *, u32 addr); + u16 (*lh)(struct lightrec_state *, u32 addr); + u32 (*lw)(struct lightrec_state *, u32 addr); +}; + +struct lightrec_mem_map { + u32 pc; + u32 length; + void *address; + const struct lightrec_mem_map_ops *ops; + const struct lightrec_mem_map *mirror_of; +}; + +struct lightrec_cop_ops { + u32 (*mfc)(struct lightrec_state *state, u8 reg); + u32 (*cfc)(struct lightrec_state *state, u8 reg); + void (*mtc)(struct lightrec_state *state, u8 reg, u32 value); + void (*ctc)(struct lightrec_state *state, u8 reg, u32 value); + void (*op)(struct lightrec_state *state, u32 opcode); +}; + +struct lightrec_ops { + struct lightrec_cop_ops cop0_ops; + struct lightrec_cop_ops cop2_ops; +}; + +__api struct lightrec_state *lightrec_init(char *argv0, + const struct lightrec_mem_map *map, + size_t nb, + const struct lightrec_ops *ops); + +__api void lightrec_destroy(struct lightrec_state *state); + +__api u32 lightrec_execute(struct lightrec_state *state, + u32 pc, u32 target_cycle); +__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc); +__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc); + +__api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len); +__api void lightrec_invalidate_all(struct lightrec_state *state); +__api void lightrec_set_invalidate_mode(struct lightrec_state *state, + _Bool dma_only); + +__api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags); +__api u32 lightrec_exit_flags(struct lightrec_state *state); + +__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]); +__api void lightrec_restore_registers(struct lightrec_state *state, + u32 regs[34]); + +__api u32 lightrec_current_cycle_count(const struct lightrec_state *state); +__api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles); +__api void lightrec_set_target_cycle_count(struct lightrec_state *state, + u32 cycles); + +__api unsigned int lightrec_get_mem_usage(enum mem_type type); +__api unsigned int lightrec_get_total_mem_usage(void); +__api float lightrec_get_average_ipi(void); + +#ifdef __cplusplus +}; +#endif + +#endif /* __LIGHTREC_H__ */ diff --git a/deps/lightrec/memmanager.c b/deps/lightrec/memmanager.c new file mode 100644 index 0000000..2e6b99b --- /dev/null +++ b/deps/lightrec/memmanager.c @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "config.h" +#include "lightrec-private.h" +#include "memmanager.h" + +#include <stdlib.h> +#if ENABLE_TINYMM +#include <tinymm.h> +#endif + +#ifdef ENABLE_THREADED_COMPILER +#include <stdatomic.h> + +static atomic_uint lightrec_bytes[MEM_TYPE_END]; + +void lightrec_register(enum mem_type type, unsigned int len) +{ + atomic_fetch_add(&lightrec_bytes[type], len); +} + +void lightrec_unregister(enum mem_type type, unsigned int len) +{ + atomic_fetch_sub(&lightrec_bytes[type], len); +} + +unsigned int lightrec_get_mem_usage(enum mem_type type) +{ + return atomic_load(&lightrec_bytes[type]); +} + +#else /* ENABLE_THREADED_COMPILER */ + +static unsigned int lightrec_bytes[MEM_TYPE_END]; + +void lightrec_register(enum mem_type type, unsigned int len) +{ + lightrec_bytes[type] += len; +} + +void lightrec_unregister(enum mem_type type, unsigned int len) +{ + lightrec_bytes[type] -= len; +} + +unsigned int lightrec_get_mem_usage(enum mem_type type) +{ + return lightrec_bytes[type]; +} +#endif /* ENABLE_THREADED_COMPILER */ + +unsigned int lightrec_get_total_mem_usage(void) +{ + unsigned int i, count; + + for (i = 0, count = 0; i < MEM_TYPE_END; i++) + count += lightrec_get_mem_usage((enum mem_type)i); + + return count; +} + +void * lightrec_malloc(struct lightrec_state *state, + enum mem_type type, unsigned int len) +{ + void *ptr; + +#if ENABLE_TINYMM + if (type == MEM_FOR_IR) + ptr = tinymm_malloc(state->tinymm, len); + else +#endif + ptr = malloc(len); + if (!ptr) + return NULL; + + lightrec_register(type, len); + + return ptr; +} + +void * lightrec_calloc(struct lightrec_state *state, + enum mem_type type, unsigned int len) +{ + void *ptr; + +#if ENABLE_TINYMM + if (type == MEM_FOR_IR) + ptr = tinymm_zalloc(state->tinymm, len); + else +#endif + ptr = calloc(1, len); + if (!ptr) + return NULL; + + lightrec_register(type, len); + + return ptr; +} + +void lightrec_free(struct lightrec_state *state, + enum mem_type type, unsigned int len, void *ptr) +{ + lightrec_unregister(type, len); +#if ENABLE_TINYMM + if (type == MEM_FOR_IR) + tinymm_free(state->tinymm, ptr); + else +#endif + free(ptr); +} + +float lightrec_get_average_ipi(void) +{ + unsigned int code_mem = lightrec_get_mem_usage(MEM_FOR_CODE); + unsigned int native_mem = lightrec_get_mem_usage(MEM_FOR_MIPS_CODE); + + return native_mem ? (float)code_mem / (float)native_mem : 0.0f; +} diff --git a/deps/lightrec/memmanager.h b/deps/lightrec/memmanager.h new file mode 100644 index 0000000..956e7c7 --- /dev/null +++ b/deps/lightrec/memmanager.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __MEMMANAGER_H__ +#define __MEMMANAGER_H__ + +#include "lightrec.h" + +void * lightrec_malloc(struct lightrec_state *state, + enum mem_type type, unsigned int len); +void * lightrec_calloc(struct lightrec_state *state, + enum mem_type type, unsigned int len); +void lightrec_free(struct lightrec_state *state, + enum mem_type type, unsigned int len, void *ptr); + +void lightrec_register(enum mem_type type, unsigned int len); +void lightrec_unregister(enum mem_type type, unsigned int len); + +#endif /* __MEMMANAGER_H__ */ diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c new file mode 100644 index 0000000..92b4daa --- /dev/null +++ b/deps/lightrec/optimizer.c @@ -0,0 +1,1021 @@ +/* + * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "disassembler.h" +#include "lightrec.h" +#include "memmanager.h" +#include "optimizer.h" +#include "regcache.h" + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> + +struct optimizer_list { + void (**optimizers)(struct opcode *); + unsigned int nb_optimizers; +}; + +bool opcode_reads_register(union code op, u8 reg) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_SYSCALL: + case OP_SPECIAL_BREAK: + return false; + case OP_SPECIAL_JR: + case OP_SPECIAL_JALR: + case OP_SPECIAL_MTHI: + case OP_SPECIAL_MTLO: + return op.r.rs == reg; + case OP_SPECIAL_MFHI: + return reg == REG_HI; + case OP_SPECIAL_MFLO: + return reg == REG_LO; + case OP_SPECIAL_SLL: + case OP_SPECIAL_SRL: + case OP_SPECIAL_SRA: + return op.r.rt == reg; + default: + return op.r.rs == reg || op.r.rt == reg; + } + case OP_CP0: + switch (op.r.rs) { + case OP_CP0_MTC0: + case OP_CP0_CTC0: + return op.r.rt == reg; + default: + return false; + } + case OP_CP2: + if (op.r.op == OP_CP2_BASIC) { + switch (op.r.rs) { + case OP_CP2_BASIC_MTC2: + case OP_CP2_BASIC_CTC2: + return op.r.rt == reg; + default: + return false; + } + } else { + return false; + } + case OP_J: + case OP_JAL: + case OP_LUI: + return false; + case OP_BEQ: + case OP_BNE: + case OP_LWL: + case OP_LWR: + case OP_SB: + case OP_SH: + case OP_SWL: + case OP_SW: + case OP_SWR: + return op.i.rs == reg || op.i.rt == reg; + default: + return op.i.rs == reg; + } +} + +bool opcode_writes_register(union code op, u8 reg) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JR: + case OP_SPECIAL_JALR: + case OP_SPECIAL_SYSCALL: + case OP_SPECIAL_BREAK: + return false; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + return reg == REG_LO || reg == REG_HI; + case OP_SPECIAL_MTHI: + return reg == REG_HI; + case OP_SPECIAL_MTLO: + return reg == REG_LO; + default: + return op.r.rd == reg; + } + case OP_ADDI: + case OP_ADDIU: + case OP_SLTI: + case OP_SLTIU: + case OP_ANDI: + case OP_ORI: + case OP_XORI: + case OP_LUI: + case OP_LB: + case OP_LH: + case OP_LWL: + case OP_LW: + case OP_LBU: + case OP_LHU: + case OP_LWR: + return op.i.rt == reg; + case OP_CP0: + switch (op.r.rs) { + case OP_CP0_MFC0: + case OP_CP0_CFC0: + return op.i.rt == reg; + default: + return false; + } + case OP_CP2: + if (op.r.op == OP_CP2_BASIC) { + switch (op.r.rs) { + case OP_CP2_BASIC_MFC2: + case OP_CP2_BASIC_CFC2: + return op.i.rt == reg; + default: + return false; + } + } else { + return false; + } + case OP_META_MOV: + return op.r.rd == reg; + default: + return false; + } +} + +/* TODO: Complete */ +static bool is_nop(union code op) +{ + if (opcode_writes_register(op, 0)) { + switch (op.i.op) { + case OP_CP0: + return op.r.rs != OP_CP0_MFC0; + case OP_LB: + case OP_LH: + case OP_LWL: + case OP_LW: + case OP_LBU: + case OP_LHU: + case OP_LWR: + return false; + default: + return true; + } + } + + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_AND: + return op.r.rd == op.r.rt && op.r.rd == op.r.rs; + case OP_SPECIAL_ADD: + case OP_SPECIAL_ADDU: + return (op.r.rd == op.r.rt && op.r.rs == 0) || + (op.r.rd == op.r.rs && op.r.rt == 0); + case OP_SPECIAL_SUB: + case OP_SPECIAL_SUBU: + return op.r.rd == op.r.rs && op.r.rt == 0; + case OP_SPECIAL_OR: + if (op.r.rd == op.r.rt) + return op.r.rd == op.r.rs || op.r.rs == 0; + else + return (op.r.rd == op.r.rs) && op.r.rt == 0; + case OP_SPECIAL_SLL: + case OP_SPECIAL_SRA: + case OP_SPECIAL_SRL: + return op.r.rd == op.r.rt && op.r.imm == 0; + default: + return false; + } + case OP_ORI: + case OP_ADDI: + case OP_ADDIU: + return op.i.rt == op.i.rs && op.i.imm == 0; + case OP_BGTZ: + return (op.i.rs == 0 || op.i.imm == 1); + case OP_REGIMM: + return (op.i.op == OP_REGIMM_BLTZ || + op.i.op == OP_REGIMM_BLTZAL) && + (op.i.rs == 0 || op.i.imm == 1); + case OP_BNE: + return (op.i.rs == op.i.rt || op.i.imm == 1); + default: + return false; + } +} + +bool load_in_delay_slot(union code op) +{ + switch (op.i.op) { + case OP_CP0: + switch (op.r.rs) { + case OP_CP0_MFC0: + case OP_CP0_CFC0: + return true; + default: + break; + } + + break; + case OP_CP2: + if (op.r.op == OP_CP2_BASIC) { + switch (op.r.rs) { + case OP_CP2_BASIC_MFC2: + case OP_CP2_BASIC_CFC2: + return true; + default: + break; + } + } + + break; + case OP_LB: + case OP_LH: + case OP_LW: + case OP_LWL: + case OP_LWR: + case OP_LBU: + case OP_LHU: + return true; + default: + break; + } + + return false; +} + +static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) +{ + switch (c.i.op) { + case OP_SPECIAL: + switch (c.r.op) { + case OP_SPECIAL_SLL: + if (known & BIT(c.r.rt)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] << c.r.imm; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SRL: + if (known & BIT(c.r.rt)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] >> c.r.imm; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SRA: + if (known & BIT(c.r.rt)) { + known |= BIT(c.r.rd); + v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SLLV: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f); + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SRLV: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f); + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SRAV: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = (s32)v[c.r.rt] + >> (v[c.r.rs] & 0x1f); + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_ADD: + case OP_SPECIAL_ADDU: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SUB: + case OP_SPECIAL_SUBU: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] - v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_AND: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] & v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_OR: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] | v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_XOR: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_NOR: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]); + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SLT: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt]; + } else { + known &= ~BIT(c.r.rd); + } + break; + case OP_SPECIAL_SLTU: + if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rs] < v[c.r.rt]; + } else { + known &= ~BIT(c.r.rd); + } + break; + default: + break; + } + break; + case OP_REGIMM: + break; + case OP_ADDI: + case OP_ADDIU: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_SLTI: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_SLTIU: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_ANDI: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = v[c.i.rs] & c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_ORI: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = v[c.i.rs] | c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_XORI: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = v[c.i.rs] ^ c.i.imm; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_LUI: + known |= BIT(c.i.rt); + v[c.i.rt] = c.i.imm << 16; + break; + case OP_CP0: + switch (c.r.rs) { + case OP_CP0_MFC0: + case OP_CP0_CFC0: + known &= ~BIT(c.r.rt); + break; + } + break; + case OP_CP2: + if (c.r.op == OP_CP2_BASIC) { + switch (c.r.rs) { + case OP_CP2_BASIC_MFC2: + case OP_CP2_BASIC_CFC2: + known &= ~BIT(c.r.rt); + break; + } + } + break; + case OP_LB: + case OP_LH: + case OP_LWL: + case OP_LW: + case OP_LBU: + case OP_LHU: + case OP_LWR: + case OP_LWC2: + known &= ~BIT(c.i.rt); + break; + case OP_META_MOV: + if (known & BIT(c.r.rs)) { + known |= BIT(c.r.rd); + v[c.r.rd] = v[c.r.rs]; + } else { + known &= ~BIT(c.r.rd); + } + break; + default: + break; + } + + return known; +} + +static int lightrec_add_meta(struct block *block, + struct opcode *op, union code code) +{ + struct opcode *meta; + + meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta)); + if (!meta) + return -ENOMEM; + + meta->c = code; + meta->flags = 0; + + if (op) { + meta->offset = op->offset; + meta->next = op->next; + op->next = meta; + } else { + meta->offset = 0; + meta->next = block->opcode_list; + block->opcode_list = meta; + } + + return 0; +} + +static int lightrec_add_sync(struct block *block, struct opcode *prev) +{ + return lightrec_add_meta(block, prev, (union code){ + .j.op = OP_META_SYNC, + }); +} + +static int lightrec_transform_ops(struct block *block) +{ + struct opcode *list = block->opcode_list; + + for (; list; list = list->next) { + + /* Transform all opcodes detected as useless to real NOPs + * (0x0: SLL r0, r0, #0) */ + if (list->opcode != 0 && is_nop(list->c)) { + pr_debug("Converting useless opcode 0x%08x to NOP\n", + list->opcode); + list->opcode = 0x0; + } + + if (!list->opcode) + continue; + + switch (list->i.op) { + /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the + * two registers is zero. */ + case OP_BEQ: + if ((list->i.rs == 0) ^ (list->i.rt == 0)) { + list->i.op = OP_META_BEQZ; + if (list->i.rs == 0) { + list->i.rs = list->i.rt; + list->i.rt = 0; + } + } else if (list->i.rs == list->i.rt) { + list->i.rs = 0; + list->i.rt = 0; + } + break; + case OP_BNE: + if (list->i.rs == 0) { + list->i.op = OP_META_BNEZ; + list->i.rs = list->i.rt; + list->i.rt = 0; + } else if (list->i.rt == 0) { + list->i.op = OP_META_BNEZ; + } + break; + + /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU + * with register $zero to the MOV meta-opcode */ + case OP_ORI: + case OP_ADDI: + case OP_ADDIU: + if (list->i.imm == 0) { + pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n"); + list->i.op = OP_META_MOV; + list->r.rd = list->i.rt; + } + break; + case OP_SPECIAL: + switch (list->r.op) { + case OP_SPECIAL_SLL: + case OP_SPECIAL_SRA: + case OP_SPECIAL_SRL: + if (list->r.imm == 0) { + pr_debug("Convert SLL/SRL/SRA #0 to MOV\n"); + list->i.op = OP_META_MOV; + list->r.rs = list->r.rt; + } + break; + case OP_SPECIAL_OR: + case OP_SPECIAL_ADD: + case OP_SPECIAL_ADDU: + if (list->r.rs == 0) { + pr_debug("Convert OR/ADD $zero to MOV\n"); + list->i.op = OP_META_MOV; + list->r.rs = list->r.rt; + } + case OP_SPECIAL_SUB: /* fall-through */ + case OP_SPECIAL_SUBU: + if (list->r.rt == 0) { + pr_debug("Convert OR/ADD/SUB $zero to MOV\n"); + list->i.op = OP_META_MOV; + } + default: /* fall-through */ + break; + } + default: /* fall-through */ + break; + } + } + + return 0; +} + +static int lightrec_switch_delay_slots(struct block *block) +{ + struct opcode *list, *prev; + u8 flags; + + for (list = block->opcode_list, prev = NULL; list->next; + prev = list, list = list->next) { + union code op = list->c; + union code next_op = list->next->c; + + if (!has_delay_slot(op) || + list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) || + op.opcode == 0) + continue; + + if (prev && has_delay_slot(prev->c)) + continue; + + switch (list->i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JALR: + if (opcode_reads_register(next_op, op.r.rd) || + opcode_writes_register(next_op, op.r.rd)) + continue; + case OP_SPECIAL_JR: /* fall-through */ + if (opcode_writes_register(next_op, op.r.rs)) + continue; + default: /* fall-through */ + break; + } + case OP_J: /* fall-through */ + break; + case OP_JAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + continue; + else + break; + case OP_BEQ: + case OP_BNE: + if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) + continue; + case OP_BLEZ: /* fall-through */ + case OP_BGTZ: + case OP_META_BEQZ: + case OP_META_BNEZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + continue; + break; + case OP_REGIMM: + switch (op.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + continue; + case OP_REGIMM_BLTZ: /* fall-through */ + case OP_REGIMM_BGEZ: + if (op.i.rs && + opcode_writes_register(next_op, op.i.rs)) + continue; + break; + } + default: /* fall-through */ + break; + } + + pr_debug("Swap branch and delay slot opcodes " + "at offsets 0x%x / 0x%x\n", list->offset << 2, + list->next->offset << 2); + + flags = list->next->flags; + list->c = next_op; + list->next->c = op; + list->next->flags = list->flags | LIGHTREC_NO_DS; + list->flags = flags; + list->offset++; + list->next->offset--; + } + + return 0; +} + +static int lightrec_detect_impossible_branches(struct block *block) +{ + struct opcode *op, *next; + + for (op = block->opcode_list, next = op->next; next; + op = next, next = op->next) { + if (!has_delay_slot(op->c) || + (!load_in_delay_slot(next->c) && + !has_delay_slot(next->c) && + !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE))) + continue; + + if (op->c.opcode == next->c.opcode) { + /* The delay slot is the exact same opcode as the branch + * opcode: this is effectively a NOP */ + next->c.opcode = 0; + continue; + } + + if (op == block->opcode_list) { + /* If the first opcode is an 'impossible' branch, we + * only keep the first two opcodes of the block (the + * branch itself + its delay slot) */ + lightrec_free_opcode_list(block->state, next->next); + next->next = NULL; + block->nb_ops = 2; + } + + op->flags |= LIGHTREC_EMULATE_BRANCH; + } + + return 0; +} + +static int lightrec_local_branches(struct block *block) +{ + struct opcode *list, *target, *prev; + s32 offset; + int ret; + + for (list = block->opcode_list; list; list = list->next) { + if (list->flags & LIGHTREC_EMULATE_BRANCH) + continue; + + switch (list->i.op) { + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + case OP_REGIMM: + case OP_META_BEQZ: + case OP_META_BNEZ: + offset = list->offset + 1 + (s16)list->i.imm; + if (offset >= 0 && offset < block->nb_ops) + break; + default: /* fall-through */ + continue; + } + + pr_debug("Found local branch to offset 0x%x\n", offset << 2); + + for (target = block->opcode_list, prev = NULL; + target; prev = target, target = target->next) { + if (target->offset != offset || + target->j.op == OP_META_SYNC) + continue; + + if (target->flags & LIGHTREC_EMULATE_BRANCH) { + pr_debug("Branch target must be emulated" + " - skip\n"); + break; + } + + if (prev && has_delay_slot(prev->c)) { + pr_debug("Branch target is a delay slot" + " - skip\n"); + break; + } + + if (prev && prev->j.op != OP_META_SYNC) { + pr_debug("Adding sync before offset " + "0x%x\n", offset << 2); + ret = lightrec_add_sync(block, prev); + if (ret) + return ret; + + prev->next->offset = target->offset; + } + + list->flags |= LIGHTREC_LOCAL_BRANCH; + break; + } + } + + return 0; +} + +bool has_delay_slot(union code op) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JR: + case OP_SPECIAL_JALR: + return true; + default: + return false; + } + case OP_J: + case OP_JAL: + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + case OP_REGIMM: + case OP_META_BEQZ: + case OP_META_BNEZ: + return true; + default: + return false; + } +} + +static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg) +{ + return lightrec_add_meta(block, op, (union code){ + .i.op = OP_META_REG_UNLOAD, + .i.rs = reg, + }); +} + +static int lightrec_early_unload(struct block *block) +{ + struct opcode *list = block->opcode_list; + u8 i; + + for (i = 1; i < 34; i++) { + struct opcode *op, *last_r = NULL, *last_w = NULL; + unsigned int last_r_id = 0, last_w_id = 0, id = 0; + int ret; + + for (op = list; op->next; op = op->next, id++) { + if (opcode_reads_register(op->c, i)) { + last_r = op; + last_r_id = id; + } + + if (opcode_writes_register(op->c, i)) { + last_w = op; + last_w_id = id; + } + } + + if (last_w_id > last_r_id) { + if (has_delay_slot(last_w->c) && + !(last_w->flags & LIGHTREC_NO_DS)) + last_w = last_w->next; + + if (last_w->next) { + ret = lightrec_add_unload(block, last_w, i); + if (ret) + return ret; + } + } else if (last_r) { + if (has_delay_slot(last_r->c) && + !(last_r->flags & LIGHTREC_NO_DS)) + last_r = last_r->next; + + if (last_r->next) { + ret = lightrec_add_unload(block, last_r, i); + if (ret) + return ret; + } + } + } + + return 0; +} + +static int lightrec_flag_stores(struct block *block) +{ + struct opcode *list; + u32 known = BIT(0); + u32 values[32] = { 0 }; + + for (list = block->opcode_list; list; list = list->next) { + /* Register $zero is always, well, zero */ + known |= BIT(0); + values[0] = 0; + + switch (list->i.op) { + case OP_SB: + case OP_SH: + case OP_SW: + /* Mark all store operations that target $sp, $gp, $k0 + * or $k1 as not requiring code invalidation. This is + * based on the heuristic that stores using one of these + * registers as address will never hit a code page. */ + if (list->i.rs >= 26 && list->i.rs <= 29) { + pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", + list->opcode); + list->flags |= LIGHTREC_NO_INVALIDATE; + } + + /* Detect writes whose destination address is inside the + * current block, using constant propagation. When these + * occur, we mark the blocks as not compilable. */ + if ((known & BIT(list->i.rs)) && + kunseg(values[list->i.rs]) >= kunseg(block->pc) && + kunseg(values[list->i.rs]) < (kunseg(block->pc) + + block->nb_ops * 4)) { + pr_debug("Self-modifying block detected\n"); + block->flags |= BLOCK_NEVER_COMPILE; + list->flags |= LIGHTREC_SMC; + } + default: /* fall-through */ + break; + } + + known = lightrec_propagate_consts(list->c, known, values); + } + + return 0; +} + +static bool is_mult32(const struct block *block, const struct opcode *op) +{ + const struct opcode *next, *last = NULL; + u32 offset; + + for (op = op->next; op != last; op = op->next) { + switch (op->i.op) { + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + case OP_REGIMM: + case OP_META_BEQZ: + case OP_META_BNEZ: + /* TODO: handle backwards branches too */ + if ((op->flags & LIGHTREC_LOCAL_BRANCH) && + (s16)op->c.i.imm >= 0) { + offset = op->offset + 1 + (s16)op->c.i.imm; + + for (next = op; next->offset != offset; + next = next->next); + + if (!is_mult32(block, next)) + return false; + + last = next; + continue; + } else { + return false; + } + case OP_SPECIAL: + switch (op->r.op) { + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + case OP_SPECIAL_MTHI: + return true; + case OP_SPECIAL_JR: + return op->r.rs == 31 && + ((op->flags & LIGHTREC_NO_DS) || + !(op->next->i.op == OP_SPECIAL && + op->next->r.op == OP_SPECIAL_MFHI)); + case OP_SPECIAL_JALR: + case OP_SPECIAL_MFHI: + return false; + default: + continue; + } + default: + continue; + } + } + + return last != NULL; +} + +static int lightrec_flag_mults(struct block *block) +{ + struct opcode *list, *prev; + + for (list = block->opcode_list, prev = NULL; list; + prev = list, list = list->next) { + if (list->i.op != OP_SPECIAL) + continue; + + switch (list->r.op) { + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + break; + default: + continue; + } + + /* Don't support MULT(U) opcodes in delay slots */ + if (prev && has_delay_slot(prev->c)) + continue; + + if (is_mult32(block, list)) { + pr_debug("Mark MULT(U) opcode at offset 0x%x as" + " 32-bit\n", list->offset << 2); + list->flags |= LIGHTREC_MULT32; + } + } + + return 0; +} + +static int (*lightrec_optimizers[])(struct block *) = { + &lightrec_detect_impossible_branches, + &lightrec_transform_ops, + &lightrec_local_branches, + &lightrec_switch_delay_slots, + &lightrec_flag_stores, + &lightrec_flag_mults, + &lightrec_early_unload, +}; + +int lightrec_optimize(struct block *block) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) { + int ret = lightrec_optimizers[i](block); + + if (ret) + return ret; + } + + return 0; +} diff --git a/deps/lightrec/optimizer.h b/deps/lightrec/optimizer.h new file mode 100644 index 0000000..d8def69 --- /dev/null +++ b/deps/lightrec/optimizer.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __OPTIMIZER_H__ +#define __OPTIMIZER_H__ + +#include "disassembler.h" + +struct block; + +_Bool opcode_reads_register(union code op, u8 reg); +_Bool opcode_writes_register(union code op, u8 reg); +_Bool has_delay_slot(union code op); +_Bool load_in_delay_slot(union code op); + +int lightrec_optimize(struct block *block); + +#endif /* __OPTIMIZER_H__ */ diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c new file mode 100644 index 0000000..379881a --- /dev/null +++ b/deps/lightrec/recompiler.c @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "debug.h" +#include "interpreter.h" +#include "lightrec-private.h" +#include "memmanager.h" + +#include <errno.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdlib.h> +#include <pthread.h> + +struct block_rec { + struct block *block; + struct block_rec *next; +}; + +struct recompiler { + struct lightrec_state *state; + pthread_t thd; + pthread_cond_t cond; + pthread_mutex_t mutex; + bool stop; + struct block *current_block; + struct block_rec *list; +}; + +static void slist_remove(struct recompiler *rec, struct block_rec *elm) +{ + struct block_rec *prev; + + if (rec->list == elm) { + rec->list = elm->next; + } else { + for (prev = rec->list; prev && prev->next != elm; ) + prev = prev->next; + if (prev) + prev->next = elm->next; + } +} + +static void lightrec_compile_list(struct recompiler *rec) +{ + struct block_rec *next; + struct block *block; + int ret; + + while (!!(next = rec->list)) { + block = next->block; + rec->current_block = block; + + pthread_mutex_unlock(&rec->mutex); + + ret = lightrec_compile_block(block); + if (ret) { + pr_err("Unable to compile block at PC 0x%x: %d\n", + block->pc, ret); + } + + pthread_mutex_lock(&rec->mutex); + + slist_remove(rec, next); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, + sizeof(*next), next); + pthread_cond_signal(&rec->cond); + } + + rec->current_block = NULL; +} + +static void * lightrec_recompiler_thd(void *d) +{ + struct recompiler *rec = d; + + pthread_mutex_lock(&rec->mutex); + + for (;;) { + do { + pthread_cond_wait(&rec->cond, &rec->mutex); + + if (rec->stop) { + pthread_mutex_unlock(&rec->mutex); + return NULL; + } + + } while (!rec->list); + + lightrec_compile_list(rec); + } +} + +struct recompiler *lightrec_recompiler_init(struct lightrec_state *state) +{ + struct recompiler *rec; + int ret; + + rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)); + if (!rec) { + pr_err("Cannot create recompiler: Out of memory\n"); + return NULL; + } + + rec->state = state; + rec->stop = false; + rec->current_block = NULL; + rec->list = NULL; + + ret = pthread_cond_init(&rec->cond, NULL); + if (ret) { + pr_err("Cannot init cond variable: %d\n", ret); + goto err_free_rec; + } + + ret = pthread_mutex_init(&rec->mutex, NULL); + if (ret) { + pr_err("Cannot init mutex variable: %d\n", ret); + goto err_cnd_destroy; + } + + ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec); + if (ret) { + pr_err("Cannot create recompiler thread: %d\n", ret); + goto err_mtx_destroy; + } + + return rec; + +err_mtx_destroy: + pthread_mutex_destroy(&rec->mutex); +err_cnd_destroy: + pthread_cond_destroy(&rec->cond); +err_free_rec: + lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); + return NULL; +} + +void lightrec_free_recompiler(struct recompiler *rec) +{ + rec->stop = true; + + /* Stop the thread */ + pthread_mutex_lock(&rec->mutex); + pthread_cond_signal(&rec->cond); + pthread_mutex_unlock(&rec->mutex); + pthread_join(rec->thd, NULL); + + pthread_mutex_destroy(&rec->mutex); + pthread_cond_destroy(&rec->cond); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); +} + +int lightrec_recompiler_add(struct recompiler *rec, struct block *block) +{ + struct block_rec *block_rec, *prev; + + pthread_mutex_lock(&rec->mutex); + + for (block_rec = rec->list, prev = NULL; block_rec; + prev = block_rec, block_rec = block_rec->next) { + if (block_rec->block == block) { + /* The block to compile is already in the queue - bump + * it to the top of the list */ + if (prev) { + prev->next = block_rec->next; + block_rec->next = rec->list; + rec->list = block_rec; + } + + pthread_mutex_unlock(&rec->mutex); + return 0; + } + } + + /* By the time this function was called, the block has been recompiled + * and ins't in the wait list anymore. Just return here. */ + if (block->function) { + pthread_mutex_unlock(&rec->mutex); + return 0; + } + + block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC, + sizeof(*block_rec)); + if (!block_rec) { + pthread_mutex_unlock(&rec->mutex); + return -ENOMEM; + } + + pr_debug("Adding block PC 0x%x to recompiler\n", block->pc); + + block_rec->block = block; + block_rec->next = rec->list; + rec->list = block_rec; + + /* Signal the thread */ + pthread_cond_signal(&rec->cond); + pthread_mutex_unlock(&rec->mutex); + + return 0; +} + +void lightrec_recompiler_remove(struct recompiler *rec, struct block *block) +{ + struct block_rec *block_rec; + + pthread_mutex_lock(&rec->mutex); + + for (block_rec = rec->list; block_rec; block_rec = block_rec->next) { + if (block_rec->block == block) { + if (block == rec->current_block) { + /* Block is being recompiled - wait for + * completion */ + do { + pthread_cond_wait(&rec->cond, + &rec->mutex); + } while (block == rec->current_block); + } else { + /* Block is not yet being processed - remove it + * from the list */ + slist_remove(rec, block_rec); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, + sizeof(*block_rec), block_rec); + } + + break; + } + } + + pthread_mutex_unlock(&rec->mutex); +} + +void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc) +{ + bool freed; + + if (likely(block->function)) { + if (block->flags & BLOCK_FULLY_TAGGED) { + freed = atomic_flag_test_and_set(&block->op_list_freed); + + if (!freed) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + + /* The block was already compiled but the opcode list + * didn't get freed yet - do it now */ + lightrec_free_opcode_list(block->state, + block->opcode_list); + block->opcode_list = NULL; + } + } + + return block->function; + } + + /* Mark the opcode list as freed, so that the threaded compiler won't + * free it while we're using it in the interpreter. */ + freed = atomic_flag_test_and_set(&block->op_list_freed); + + /* Block wasn't compiled yet - run the interpreter */ + *pc = lightrec_emulate_block(block, *pc); + + if (!freed) + atomic_flag_clear(&block->op_list_freed); + + /* The block got compiled while the interpreter was running. + * We can free the opcode list now. */ + if (block->function && (block->flags & BLOCK_FULLY_TAGGED) && + !atomic_flag_test_and_set(&block->op_list_freed)) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + + lightrec_free_opcode_list(block->state, block->opcode_list); + block->opcode_list = NULL; + } + + return NULL; +} diff --git a/deps/lightrec/recompiler.h b/deps/lightrec/recompiler.h new file mode 100644 index 0000000..99e82aa --- /dev/null +++ b/deps/lightrec/recompiler.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __LIGHTREC_RECOMPILER_H__ +#define __LIGHTREC_RECOMPILER_H__ + +struct block; +struct lightrec_state; +struct recompiler; + +struct recompiler *lightrec_recompiler_init(struct lightrec_state *state); +void lightrec_free_recompiler(struct recompiler *rec); +int lightrec_recompiler_add(struct recompiler *rec, struct block *block); +void lightrec_recompiler_remove(struct recompiler *rec, struct block *block); + +void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc); + +#endif /* __LIGHTREC_RECOMPILER_H__ */ diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c new file mode 100644 index 0000000..0256015 --- /dev/null +++ b/deps/lightrec/regcache.c @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "debug.h" +#include "memmanager.h" +#include "regcache.h" + +#include <lightning.h> +#include <stdbool.h> +#include <stddef.h> + +struct native_register { + bool used, loaded, dirty, output, extend, extended, locked; + s8 emulated_register; +}; + +struct regcache { + struct lightrec_state *state; + struct native_register lightrec_regs[NUM_REGS + NUM_TEMPS]; +}; + +static const char * mips_regs[] = { + "zero", + "at", + "v0", "v1", + "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", + "k0", "k1", + "gp", "sp", "fp", "ra", + "lo", "hi", +}; + +const char * lightrec_reg_name(u8 reg) +{ + return mips_regs[reg]; +} + +static inline u8 lightrec_reg_number(const struct regcache *cache, + const struct native_register *nreg) +{ + return (u8) (((uintptr_t) nreg - (uintptr_t) cache->lightrec_regs) + / sizeof(*nreg)); +} + +static inline u8 lightrec_reg_to_lightning(const struct regcache *cache, + const struct native_register *nreg) +{ + u8 offset = lightrec_reg_number(cache, nreg); + return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS); +} + +static inline struct native_register * lightning_reg_to_lightrec( + struct regcache *cache, u8 reg) +{ + if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) || + (JIT_V0 < JIT_R0 && reg < JIT_R0)) { + if (JIT_V1 > JIT_V0) + return &cache->lightrec_regs[reg - JIT_V0]; + else + return &cache->lightrec_regs[JIT_V0 - reg]; + } else { + if (JIT_R1 > JIT_R0) + return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0]; + else + return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg]; + } +} + +static struct native_register * alloc_temp(struct regcache *cache) +{ + unsigned int i; + + /* We search the register list in reverse order. As temporaries are + * meant to be used only in the emitter functions, they can be mapped to + * caller-saved registers, as they won't have to be saved back to + * memory. */ + for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { + struct native_register *nreg = &cache->lightrec_regs[i - 1]; + if (!nreg->used && !nreg->loaded && !nreg->dirty) + return nreg; + } + + for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { + struct native_register *nreg = &cache->lightrec_regs[i - 1]; + if (!nreg->used) + return nreg; + } + + return NULL; +} + +static struct native_register * find_mapped_reg(struct regcache *cache, + u8 reg, bool out) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + struct native_register *nreg = &cache->lightrec_regs[i]; + if ((!reg || nreg->loaded || nreg->dirty) && + nreg->emulated_register == reg && + (!out || !nreg->locked)) + return nreg; + } + + return NULL; +} + +static struct native_register * alloc_in_out(struct regcache *cache, + u8 reg, bool out) +{ + struct native_register *nreg; + unsigned int i; + + /* Try to find if the register is already mapped somewhere */ + nreg = find_mapped_reg(cache, reg, out); + if (nreg) + return nreg; + + /* Try to allocate a non-dirty, non-loaded register. + * Loaded registers may be re-used later, so it's better to avoid + * re-using one if possible. */ + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + if (!nreg->used && !nreg->dirty && !nreg->loaded) + return nreg; + } + + /* Try to allocate a non-dirty register */ + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + if (!nreg->used && !nreg->dirty) + return nreg; + } + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + if (!nreg->used) + return nreg; + } + + return NULL; +} + +static void lightrec_discard_nreg(struct native_register *nreg) +{ + nreg->extended = false; + nreg->loaded = false; + nreg->output = false; + nreg->dirty = false; + nreg->used = false; + nreg->locked = false; + nreg->emulated_register = -1; +} + +static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, + struct native_register *nreg, u8 jit_reg) +{ + /* If we get a dirty register, store back the old value */ + if (nreg->dirty) { + s16 offset = offsetof(struct lightrec_state, native_reg_cache) + + (nreg->emulated_register << 2); + + jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); + } + + lightrec_discard_nreg(nreg); +} + +void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) +{ + lightrec_unload_nreg(cache, _jit, + lightning_reg_to_lightrec(cache, jit_reg), jit_reg); +} + +/* lightrec_lock_reg: the register will be cleaned if dirty, then locked. + * A locked register cannot only be used as input, not output. */ +void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) +{ + struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + + lightrec_clean_reg(cache, _jit, jit_reg); + + reg->locked = true; +} + +u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) +{ + struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + + lightrec_unload_nreg(cache, _jit, reg, jit_reg); + + reg->used = true; + return jit_reg; +} + +u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit) +{ + u8 jit_reg; + struct native_register *nreg = alloc_temp(cache); + if (!nreg) { + /* No free register, no dirty register to free. */ + pr_err("No more registers! Abandon ship!\n"); + return 0; + } + + jit_reg = lightrec_reg_to_lightning(cache, nreg); + lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + + nreg->used = true; + return jit_reg; +} + +u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg) +{ + u8 jit_reg; + struct native_register *nreg = alloc_in_out(cache, reg, true); + if (!nreg) { + /* No free register, no dirty register to free. */ + pr_err("No more registers! Abandon ship!\n"); + return 0; + } + + jit_reg = lightrec_reg_to_lightning(cache, nreg); + + /* If we get a dirty register that doesn't correspond to the one + * we're requesting, store back the old value */ + if (nreg->emulated_register != reg) + lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + + nreg->extend = false; + nreg->used = true; + nreg->output = true; + nreg->emulated_register = reg; + return jit_reg; +} + +u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg) +{ + u8 jit_reg; + bool reg_changed; + struct native_register *nreg = alloc_in_out(cache, reg, false); + if (!nreg) { + /* No free register, no dirty register to free. */ + pr_err("No more registers! Abandon ship!\n"); + return 0; + } + + jit_reg = lightrec_reg_to_lightning(cache, nreg); + + /* If we get a dirty register that doesn't correspond to the one + * we're requesting, store back the old value */ + reg_changed = nreg->emulated_register != reg; + if (reg_changed) + lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + + if (!nreg->loaded && !nreg->dirty && reg != 0) { + s16 offset = offsetof(struct lightrec_state, native_reg_cache) + + (reg << 2); + + /* Load previous value from register cache */ + jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); + nreg->loaded = true; + nreg->extended = true; + } + + /* Clear register r0 before use */ + if (reg == 0 && (!nreg->loaded || nreg->dirty)) { + jit_movi(jit_reg, 0); + nreg->extended = true; + nreg->loaded = true; + } + + nreg->used = true; + nreg->output = false; + nreg->emulated_register = reg; + return jit_reg; +} + +u8 lightrec_alloc_reg_out_ext(struct regcache *cache, jit_state_t *_jit, u8 reg) +{ + struct native_register *nreg; + u8 jit_reg; + + jit_reg = lightrec_alloc_reg_out(cache, _jit, reg); + nreg = lightning_reg_to_lightrec(cache, jit_reg); + + nreg->extend = true; + + return jit_reg; +} + +u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg) +{ + struct native_register *nreg; + u8 jit_reg; + + jit_reg = lightrec_alloc_reg_in(cache, _jit, reg); + nreg = lightning_reg_to_lightrec(cache, jit_reg); + +#if __WORDSIZE == 64 + if (!nreg->extended) { + nreg->extended = true; + jit_extr_i(jit_reg, jit_reg); + } +#endif + + return jit_reg; +} + +u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, + u8 reg, u8 jit_reg) +{ + struct native_register *nreg; + u16 offset; + + nreg = find_mapped_reg(cache, reg, false); + if (nreg) { + jit_reg = lightrec_reg_to_lightning(cache, nreg); + nreg->used = true; + return jit_reg; + } + + nreg = lightning_reg_to_lightrec(cache, jit_reg); + lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + + /* Load previous value from register cache */ + offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2); + jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); + + nreg->extended = true; + nreg->used = true; + nreg->loaded = true; + nreg->emulated_register = reg; + + return jit_reg; +} + +static void free_reg(struct native_register *nreg) +{ + /* Set output registers as dirty */ + if (nreg->used && nreg->output && nreg->emulated_register > 0) + nreg->dirty = true; + if (nreg->output) + nreg->extended = nreg->extend; + nreg->used = false; +} + +void lightrec_free_reg(struct regcache *cache, u8 jit_reg) +{ + free_reg(lightning_reg_to_lightrec(cache, jit_reg)); +} + +void lightrec_free_regs(struct regcache *cache) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) + free_reg(&cache->lightrec_regs[i]); +} + +static void clean_reg(jit_state_t *_jit, + struct native_register *nreg, u8 jit_reg, bool clean) +{ + if (nreg->dirty) { + s16 offset = offsetof(struct lightrec_state, native_reg_cache) + + (nreg->emulated_register << 2); + + jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); + nreg->loaded |= nreg->dirty; + nreg->dirty ^= clean; + } +} + +static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean) +{ + unsigned int i; + + for (i = 0; i < NUM_REGS; i++) + clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean); + for (i = 0; i < NUM_TEMPS; i++) { + clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS], + JIT_R(i), clean); + } +} + +void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit) +{ + clean_regs(cache, _jit, false); +} + +void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit) +{ + clean_regs(cache, _jit, true); +} + +void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) +{ + struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + clean_reg(_jit, reg, jit_reg, true); +} + +void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, + u8 reg, bool unload) +{ + struct native_register *nreg; + u8 jit_reg; + + nreg = find_mapped_reg(cache, reg, false); + if (nreg) { + jit_reg = lightrec_reg_to_lightning(cache, nreg); + + if (unload) + lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + else + clean_reg(_jit, nreg, jit_reg, true); + } +} + +struct native_register * lightrec_regcache_enter_branch(struct regcache *cache) +{ + struct native_register *backup; + + backup = lightrec_malloc(cache->state, MEM_FOR_LIGHTREC, + sizeof(cache->lightrec_regs)); + memcpy(backup, &cache->lightrec_regs, sizeof(cache->lightrec_regs)); + + return backup; +} + +void lightrec_regcache_leave_branch(struct regcache *cache, + struct native_register *regs) +{ + memcpy(&cache->lightrec_regs, regs, sizeof(cache->lightrec_regs)); + lightrec_free(cache->state, MEM_FOR_LIGHTREC, + sizeof(cache->lightrec_regs), regs); +} + +void lightrec_regcache_reset(struct regcache *cache) +{ + memset(&cache->lightrec_regs, 0, sizeof(cache->lightrec_regs)); +} + +struct regcache * lightrec_regcache_init(struct lightrec_state *state) +{ + struct regcache *cache; + + cache = lightrec_calloc(state, MEM_FOR_LIGHTREC, sizeof(*cache)); + if (!cache) + return NULL; + + cache->state = state; + + return cache; +} + +void lightrec_free_regcache(struct regcache *cache) +{ + return lightrec_free(cache->state, MEM_FOR_LIGHTREC, + sizeof(*cache), cache); +} + +void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit) +{ + struct native_register *nreg; + unsigned int i; + +#ifdef _WIN32 + /* FIXME: GNU Lightning on Windows seems to use our mapped registers as + * temporaries. Until the actual bug is found and fixed, unconditionally + * mark our registers as live here. */ + for (i = 0; i < NUM_REGS; i++) { + nreg = &cache->lightrec_regs[i]; + + if (nreg->used || nreg->loaded || nreg->dirty) + jit_live(JIT_V(i)); + } +#endif + + for (i = 0; i < NUM_TEMPS; i++) { + nreg = &cache->lightrec_regs[NUM_REGS + i]; + + if (nreg->used || nreg->loaded || nreg->dirty) + jit_live(JIT_R(i)); + } +} diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h new file mode 100644 index 0000000..956cc3c --- /dev/null +++ b/deps/lightrec/regcache.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef __REGCACHE_H__ +#define __REGCACHE_H__ + +#include "lightrec-private.h" + +#define NUM_REGS (JIT_V_NUM - 2) +#define NUM_TEMPS (JIT_R_NUM) +#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) +#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2)) + +#define REG_LO 32 +#define REG_HI 33 + +struct register_value { + _Bool known; + u32 value; +}; + +struct native_register; +struct regcache; + +u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); +u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit); +u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg); +u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg); +u8 lightrec_alloc_reg_out_ext(struct regcache *cache, + jit_state_t *_jit, u8 reg); +u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg); + +u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, + u8 reg, u8 jit_reg); + +void lightrec_regcache_reset(struct regcache *cache); + +void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); +void lightrec_free_reg(struct regcache *cache, u8 jit_reg); +void lightrec_free_regs(struct regcache *cache); +void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); +void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit); +void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); +void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); + +void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, + u8 reg, _Bool unload); + +u8 lightrec_alloc_reg_in_address(struct regcache *cache, + jit_state_t *_jit, u8 reg, s16 offset); + +struct native_register * lightrec_regcache_enter_branch(struct regcache *cache); +void lightrec_regcache_leave_branch(struct regcache *cache, + struct native_register *regs); + +struct regcache * lightrec_regcache_init(struct lightrec_state *state); +void lightrec_free_regcache(struct regcache *cache); + +const char * lightrec_reg_name(u8 reg); + +void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit); + +#endif /* __REGCACHE_H__ */ |