From 3aa38ef8a9511c4a71a69955779c393cd6c8f549 Mon Sep 17 00:00:00 2001 From: Matt Liberty Date: Mon, 21 Sep 2020 15:35:21 -0700 Subject: [PATCH] add VTune Task API support for better profiling --- CMakeLists.txt | 23 +++++++++++- cmake/FindVTune.cmake | 40 ++++++++++++++++++++ src/dr/FlexDR.cpp | 63 ++++++++++++++++++------------- src/dr/FlexDR_conn.cpp | 2 + src/dr/FlexDR_maze.cpp | 3 +- src/frProfileTask.h | 74 +++++++++++++++++++++++++++++++++++++ src/gc/FlexGC_init.cpp | 2 + src/gc/FlexGC_main.cpp | 2 + src/io/io.cpp | 6 +++ src/io/io_parser_helper.cpp | 2 + src/pa/FlexPA.cpp | 5 +++ src/pa/FlexPA_prep.cpp | 4 ++ src/rp/FlexRP.cpp | 2 + src/rp/FlexRP_init.cpp | 4 +- src/rp/FlexRP_prep.cpp | 2 + src/ta/FlexTA.cpp | 7 ++++ 16 files changed, 213 insertions(+), 28 deletions(-) create mode 100644 cmake/FindVTune.cmake create mode 100644 src/frProfileTask.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f8a3b80..fe18798 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,8 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake) + include(CheckIPOSupported) check_ipo_supported(RESULT ipo_supported OUTPUT error) @@ -15,7 +17,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) message(FATAL_ERROR "TritonRoute requires GCC 7 and above! \nuse -DCMAKE_CXX_COMPILER to assign your compiler path") endif() -# Default to bulding optimnized/release executable. +# Default to bulding optimized/release executable. if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RELEASE) endif() @@ -43,6 +45,7 @@ find_package(BISON REQUIRED) find_package(Boost 1.68.0 COMPONENTS unit_test_framework) find_package(OpenMP REQUIRED) find_package(Threads REQUIRED) +find_package(VTune) set (FLEXROUTE_SRC ${FLEXROUTE_HOME}/src/dr/FlexDR_conn.cpp @@ -179,6 +182,7 @@ target_include_directories( flexroutelib ) target_link_libraries( flexroutelib + PUBLIC def lef @@ -255,3 +259,20 @@ if (Boost_unit_test_framework_FOUND) endif() add_test(NAME trTest COMMAND trTest) + +############################################################ +# VTune ITT API +############################################################ + +if (VTune_FOUND) + target_compile_definitions( flexroutelib + PUBLIC + HAS_VTUNE=1 + ) + + target_link_libraries( flexroutelib + PUBLIC + VTune::VTune + ) + +endif(VTune_FOUND) diff --git a/cmake/FindVTune.cmake b/cmake/FindVTune.cmake new file mode 100644 index 0000000..ca814a4 --- /dev/null +++ b/cmake/FindVTune.cmake @@ -0,0 +1,40 @@ +include(FindPackageHandleStandardArgs) + +if( CMAKE_VTUNE_HOME ) + set( VTUNE_HOME ${CMAKE_VTUNE_HOME} ) +elseif( DEFINED ENV{CMAKE_VTUNE_HOME} ) + set( VTUNE_HOME $ENV{CMAKE_VTUNE_HOME} ) +else() + set( VTUNE_HOME /home/tool/intel/vtune_amplifier) +endif() + + +find_path(VTune_INCLUDE_DIRS ittnotify.h + PATHS ${VTUNE_HOME} + PATH_SUFFIXES include) + +find_library(VTune_LIBRARIES ittnotify + HINTS "${VTune_INCLUDE_DIRS}/.." + PATHS ${VTUNE_HOME} + PATH_SUFFIXES lib64) + +find_package_handle_standard_args( + VTune DEFAULT_MSG VTune_LIBRARIES VTune_INCLUDE_DIRS) + +if( VTune_FOUND AND NOT TARGET VTune::VTune ) + add_library(VTune::VTune UNKNOWN IMPORTED) + set_target_properties(VTune::VTune PROPERTIES + IMPORTED_LOCATION "${VTune_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${VTune_INCLUDE_DIRS}" + ) + + mark_as_advanced( + VTune_INCLUDE_DIR + VTune_LIBRARIES + ) + + target_link_libraries( VTune::VTune + INTERFACE + ${CMAKE_DL_LIBS} + ) +endif() diff --git a/src/dr/FlexDR.cpp b/src/dr/FlexDR.cpp index cb91b4a..eab48da 100755 --- a/src/dr/FlexDR.cpp +++ b/src/dr/FlexDR.cpp @@ -29,7 +29,7 @@ #include #include #include -//#include +#include "frProfileTask.h" #include "dr/FlexDR.h" #include "db/infra/frTime.h" #include @@ -74,6 +74,7 @@ int FlexDRWorker::main() { } int FlexDRWorker::main_mt() { + ProfileTask profile("DR:main_mt"); using namespace std::chrono; high_resolution_clock::time_point t0 = high_resolution_clock::now(); if (VERBOSE > 1) { @@ -1231,6 +1232,7 @@ void FlexDR::init_via2turnMinLen() { void FlexDR::init() { + ProfileTask profile("DR:init"); frTime t; if (VERBOSE > 0) { cout < END_ITERATION) { return; } @@ -1618,38 +1623,45 @@ void FlexDR::searchRepair(int iter, int size, int offset, int mazeEndIter, // parallel execution for (auto &workerBatch: workers) { + ProfileTask profile("DR:checkerboard"); for (auto &workersInBatch: workerBatch) { - // multi thread - #pragma omp parallel for schedule(dynamic) - for (int i = 0; i < (int)workersInBatch.size(); i++) { - workersInBatch[i]->main_mt(); - #pragma omp critical - { - cnt++; - if (VERBOSE > 0) { - if (cnt * 1.0 / tot >= prev_perc / 100.0 + 0.1 && prev_perc < 90) { - if (prev_perc == 0 && t.isExceed(0)) { - isExceed = true; - } - prev_perc += 10; - //if (true) { - if (isExceed) { - if (enableDRC) { - cout <<" completing " <getTopBlock()->getNumMarkers() <<" violations" <main_mt(); + #pragma omp critical + { + cnt++; + if (VERBOSE > 0) { + if (cnt * 1.0 / tot >= prev_perc / 100.0 + 0.1 && prev_perc < 90) { + if (prev_perc == 0 && t.isExceed(0)) { + isExceed = true; + } + prev_perc += 10; + //if (true) { + if (isExceed) { + if (enableDRC) { + cout <<" completing " <getTopBlock()->getNumMarkers() <<" violations" <end(); + { + ProfileTask profile("DR:end_batch"); + // single thread + for (int i = 0; i < (int)workersInBatch.size(); i++) { + workersInBatch[i]->end(); + } + workersInBatch.clear(); } - workersInBatch.clear(); } } } @@ -1898,6 +1910,7 @@ void FlexDR::reportDRC() { int FlexDR::main() { + ProfileTask profile("DR:main"); init(); frTime t; if (VERBOSE > 0) { diff --git a/src/dr/FlexDR_conn.cpp b/src/dr/FlexDR_conn.cpp index f7ff1c3..2987925 100755 --- a/src/dr/FlexDR_conn.cpp +++ b/src/dr/FlexDR_conn.cpp @@ -26,6 +26,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "frProfileTask.h" #include "dr/FlexDR.h" #include "io/io.h" #include @@ -1018,6 +1019,7 @@ void FlexDR::checkConnectivity_addMarker(frNet* net, frLayerNum lNum, const frBo // feedthrough and loop check void FlexDR::checkConnectivity(int iter) { + ProfileTask profile("DR:checkConnectivity"); bool isWrong = false; int batchSize = 131072; diff --git a/src/dr/FlexDR_maze.cpp b/src/dr/FlexDR_maze.cpp index 6dcda84..46b0f09 100755 --- a/src/dr/FlexDR_maze.cpp +++ b/src/dr/FlexDR_maze.cpp @@ -26,8 +26,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "frProfileTask.h" #include "dr/FlexDR.h" -//#include "drc/frDRC.h" #include "gc/FlexGC.h" #include #include @@ -2939,6 +2939,7 @@ void FlexDRWorker::routeNet_prepAreaMap(drNet* net, map &a } bool FlexDRWorker::routeNet(drNet* net) { + ProfileTask profile("DR:routeNet"); //bool enableOutput = true; bool enableOutput = false; if (net->getPins().size() <= 1) { diff --git a/src/frProfileTask.h b/src/frProfileTask.h new file mode 100644 index 0000000..fb06b8b --- /dev/null +++ b/src/frProfileTask.h @@ -0,0 +1,74 @@ +/* Authors: Matt Liberty */ +/* + * Copyright (c) 2020, The Regents of the University of California + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FR_PROFILE_TASK_H_ +#define _FR_PROFILE_TASK_H_ + + +#ifdef HAS_VTUNE +#include +#endif + +namespace fr { + +#ifdef HAS_VTUNE +// This class make a VTune task in its scope (RAII). This is useful +// in VTune to see where the runtime is going with more domain specific +// display. +class ProfileTask +{ +public: + ProfileTask(const char* name) { + domain_ = __itt_domain_create("TritonRoute"); + name_ = __itt_string_handle_create(name); + __itt_task_begin(domain_, __itt_null, __itt_null, name_); + } + + ~ProfileTask() { + __itt_task_end(domain_); + } + +private: + __itt_domain* domain_; + __itt_string_handle* name_; +}; + +#else + +// No-op version +class ProfileTask +{ +public: + ProfileTask(const char* name) { + } +}; +#endif + +} + +#endif diff --git a/src/gc/FlexGC_init.cpp b/src/gc/FlexGC_init.cpp index 28210eb..eac0ab3 100755 --- a/src/gc/FlexGC_init.cpp +++ b/src/gc/FlexGC_init.cpp @@ -27,6 +27,7 @@ */ #include +#include "frProfileTask.h" #include "gc/FlexGC.h" #include "db/drObj/drNet.h" #include "dr/FlexDR.h" @@ -749,6 +750,7 @@ void FlexGCWorker::initRegionQuery() { // init initializes all nets from frDesign if no drWorker is provided void FlexGCWorker::init() { + ProfileTask profile("GC:init"); //bool enableOutput = true; bool enableOutput = false; addNet(design->getTopBlock()->getFakeVSSNet()); //[0] floating VSS diff --git a/src/gc/FlexGC_main.cpp b/src/gc/FlexGC_main.cpp index a428e71..8d5fefe 100755 --- a/src/gc/FlexGC_main.cpp +++ b/src/gc/FlexGC_main.cpp @@ -27,6 +27,7 @@ */ #include +#include "frProfileTask.h" #include "gc/FlexGC.h" using namespace std; @@ -3697,6 +3698,7 @@ void FlexGCWorker::patchMetalShape_helper() { int FlexGCWorker::main() { + ProfileTask profile("GC:main"); //printMarker = true; // minStep patching for GF14 if (surgicalFixEnabled && getDRWorker() && DBPROCESSNODE == "GF14_13M_3Mx_2Cx_4Kx_2Hx_2Gx_LB") { diff --git a/src/io/io.cpp b/src/io/io.cpp index a840440..15dfcd8 100755 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -31,6 +31,7 @@ #include #include +#include "frProfileTask.h" #include "global.h" #include "io/io.h" #include "db/tech/frConstraint.h" @@ -1069,6 +1070,7 @@ int io::Parser::Callbacks::getDefUnits(defrCallbackType_e type, double number, d } void io::Parser::readDef() { + ProfileTask profile("IO:readDef"); FILE* f; int res; @@ -4805,6 +4807,7 @@ int io::Parser::Callbacks::getLefViaRules(lefrCallbackType_e type, lefiViaRule* } void io::Parser::readLef() { + ProfileTask profile("IO:readLef"); FILE* f; int res; @@ -4908,6 +4911,7 @@ void io::Parser::readLefDef() { } void io::Parser::readGuide() { + ProfileTask profile("IO:readGuide"); if (VERBOSE > 0) { cout < 0) { cout <<"Waring: no output def specified, skipped writing track assignment def" < 0) { cout <<"Waring: no output def specified, skipped writing routed def" < #include @@ -667,6 +668,7 @@ void io::Parser::postProcess() { } void io::Parser::postProcessGuide() { + ProfileTask profile("IO:postProcessGuide"); if (VERBOSE > 0) { cout < #include #include +#include "frProfileTask.h" #include "FlexPA.h" #include "db/infra/frTime.h" #include "gc/FlexGC.h" @@ -37,6 +38,7 @@ using namespace std; using namespace fr; void FlexPA::init() { + ProfileTask profile("PA:init"); initViaRawPriority(); initTrackCoords(); @@ -45,6 +47,7 @@ void FlexPA::init() { } void FlexPA::prep() { + ProfileTask profile("PA:prep"); using namespace std::chrono; high_resolution_clock::time_point t0 = high_resolution_clock::now(); prepPoint(); @@ -61,6 +64,8 @@ void FlexPA::prep() { } int FlexPA::main() { + ProfileTask profile("PA:main"); + //bool enableOutput = true; frTime t; if (VERBOSE > 0) { diff --git a/src/pa/FlexPA_prep.cpp b/src/pa/FlexPA_prep.cpp index 6778638..e84c15d 100755 --- a/src/pa/FlexPA_prep.cpp +++ b/src/pa/FlexPA_prep.cpp @@ -29,6 +29,7 @@ #include #include #include +#include "frProfileTask.h" #include "FlexPA.h" #include "db/infra/frTime.h" #include "gc/FlexGC.h" @@ -1116,6 +1117,7 @@ void FlexPA::prepPoint_pin(frPin* pin, frInstTerm* instTerm) { } void FlexPA::prepPoint() { + ProfileTask profile("PA:point"); // bool enableOutput = true; bool enableOutput = false; if (enableOutput) { @@ -1138,6 +1140,7 @@ void FlexPA::prepPoint() { inst->getRefBlock()->getMacroClass() != MacroClassEnum::RING) { continue; } + ProfileTask profile("PA:uniqueInstance"); for (auto &instTerm: inst->getInstTerms()) { // only do for normal and clock terms if (isSkipInstTerm(instTerm.get())) { @@ -1198,6 +1201,7 @@ void FlexPA::prepPoint() { } void FlexPA::prepPattern() { + ProfileTask profile("PA:pattern"); //bool enableOutput = true; bool enableOutput = false; if (enableOutput) { diff --git a/src/rp/FlexRP.cpp b/src/rp/FlexRP.cpp index d0a21ee..238ebd5 100644 --- a/src/rp/FlexRP.cpp +++ b/src/rp/FlexRP.cpp @@ -28,6 +28,7 @@ #include #include +#include "frProfileTask.h" #include "FlexRP.h" #include "db/infra/frTime.h" #include "gc/FlexGC.h" @@ -36,6 +37,7 @@ using namespace std; using namespace fr; void FlexRP::main() { + ProfileTask profile("RP:main"); init(); prep(); } diff --git a/src/rp/FlexRP_init.cpp b/src/rp/FlexRP_init.cpp index 812ef21..4790db0 100644 --- a/src/rp/FlexRP_init.cpp +++ b/src/rp/FlexRP_init.cpp @@ -28,6 +28,7 @@ #include #include +#include "frProfileTask.h" #include "FlexRP.h" #include "db/infra/frTime.h" #include "gc/FlexGC.h" @@ -36,6 +37,7 @@ using namespace std; using namespace fr; void FlexRP::init() { + ProfileTask profile("RP:init"); bool enableOutput = false; vector > forbiddenRanges; @@ -66,4 +68,4 @@ void FlexRP::init() { cout << "tech->line2LineForbiddenLen size = " << tech->line2LineForbiddenLen.size() << "\n"; cout << "tech->viaForbiddenThrough size = " << tech->viaForbiddenThrough.size() << "\n"; } -} \ No newline at end of file +} diff --git a/src/rp/FlexRP_prep.cpp b/src/rp/FlexRP_prep.cpp index 4b86f98..ce90182 100644 --- a/src/rp/FlexRP_prep.cpp +++ b/src/rp/FlexRP_prep.cpp @@ -28,6 +28,7 @@ #include #include +#include "frProfileTask.h" #include "FlexRP.h" #include "db/infra/frTime.h" #include "gc/FlexGC.h" @@ -36,6 +37,7 @@ using namespace std; using namespace fr; void FlexRP::prep() { + ProfileTask profile("RP:prep"); prep_via2viaForbiddenLen(); prep_viaForbiddenTurnLen(); prep_viaForbiddenPlanarLen(); diff --git a/src/ta/FlexTA.cpp b/src/ta/FlexTA.cpp index c316a65..6275ae9 100755 --- a/src/ta/FlexTA.cpp +++ b/src/ta/FlexTA.cpp @@ -32,6 +32,7 @@ #include "global.h" #include "FlexTA.h" #include "db/infra/frTime.h" +#include "frProfileTask.h" #include #include @@ -82,6 +83,7 @@ int FlexTAWorker::main() { } int FlexTAWorker::main_mt() { + ProfileTask profile("TA:main_mt"); using namespace std::chrono; high_resolution_clock::time_point t0 = high_resolution_clock::now(); if (VERBOSE > 1) { @@ -222,6 +224,7 @@ int FlexTA::initTA_helper(int iter, int size, int offset, bool isH, int &numPane // parallel execution // multi thread for (auto &workerBatch: workers) { + ProfileTask profile("TA:batch"); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < (int)workerBatch.size(); i++) { workerBatch[i]->main_mt(); @@ -241,6 +244,7 @@ int FlexTA::initTA_helper(int iter, int size, int offset, bool isH, int &numPane } void FlexTA::initTA(int size) { + ProfileTask profile("TA:init"); frTime t; if (VERBOSE > 1) { @@ -282,6 +286,7 @@ void FlexTA::initTA(int size) { } void FlexTA::searchRepair(int iter, int size, int offset) { + ProfileTask profile("TA:searchRepair"); frTime t; if (VERBOSE > 1) { @@ -337,6 +342,8 @@ void FlexTA::searchRepair(int iter, int size, int offset) { } int FlexTA::main() { + ProfileTask profile("TA:main"); + frTime t; if (VERBOSE > 0) { cout <