add VTune Task API support for better profiling

This commit is contained in:
Matt Liberty
2020-09-21 15:35:21 -07:00
parent 3aa919fe34
commit 3aa38ef8a9
16 changed files with 213 additions and 28 deletions

View File

@@ -8,6 +8,8 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake)
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported OUTPUT error)
@@ -15,7 +17,7 @@ if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7)
message(FATAL_ERROR "TritonRoute requires GCC 7 and above! \nuse -DCMAKE_CXX_COMPILER to assign your compiler path")
endif()
# Default to bulding optimnized/release executable.
# Default to bulding optimized/release executable.
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE RELEASE)
endif()
@@ -43,6 +45,7 @@ find_package(BISON REQUIRED)
find_package(Boost 1.68.0 COMPONENTS unit_test_framework)
find_package(OpenMP REQUIRED)
find_package(Threads REQUIRED)
find_package(VTune)
set (FLEXROUTE_SRC
${FLEXROUTE_HOME}/src/dr/FlexDR_conn.cpp
@@ -179,6 +182,7 @@ target_include_directories( flexroutelib
)
target_link_libraries( flexroutelib
PUBLIC
def
lef
@@ -255,3 +259,20 @@ if (Boost_unit_test_framework_FOUND)
endif()
add_test(NAME trTest COMMAND trTest)
############################################################
# VTune ITT API
############################################################
if (VTune_FOUND)
target_compile_definitions( flexroutelib
PUBLIC
HAS_VTUNE=1
)
target_link_libraries( flexroutelib
PUBLIC
VTune::VTune
)
endif(VTune_FOUND)

40
cmake/FindVTune.cmake Normal file
View File

@@ -0,0 +1,40 @@
include(FindPackageHandleStandardArgs)
if( CMAKE_VTUNE_HOME )
set( VTUNE_HOME ${CMAKE_VTUNE_HOME} )
elseif( DEFINED ENV{CMAKE_VTUNE_HOME} )
set( VTUNE_HOME $ENV{CMAKE_VTUNE_HOME} )
else()
set( VTUNE_HOME /home/tool/intel/vtune_amplifier)
endif()
find_path(VTune_INCLUDE_DIRS ittnotify.h
PATHS ${VTUNE_HOME}
PATH_SUFFIXES include)
find_library(VTune_LIBRARIES ittnotify
HINTS "${VTune_INCLUDE_DIRS}/.."
PATHS ${VTUNE_HOME}
PATH_SUFFIXES lib64)
find_package_handle_standard_args(
VTune DEFAULT_MSG VTune_LIBRARIES VTune_INCLUDE_DIRS)
if( VTune_FOUND AND NOT TARGET VTune::VTune )
add_library(VTune::VTune UNKNOWN IMPORTED)
set_target_properties(VTune::VTune PROPERTIES
IMPORTED_LOCATION "${VTune_LIBRARIES}"
INTERFACE_INCLUDE_DIRECTORIES "${VTune_INCLUDE_DIRS}"
)
mark_as_advanced(
VTune_INCLUDE_DIR
VTune_LIBRARIES
)
target_link_libraries( VTune::VTune
INTERFACE
${CMAKE_DL_LIBS}
)
endif()

View File

@@ -29,7 +29,7 @@
#include <chrono>
#include <fstream>
#include <boost/io/ios_state.hpp>
//#include <taskflow/taskflow.hpp>
#include "frProfileTask.h"
#include "dr/FlexDR.h"
#include "db/infra/frTime.h"
#include <omp.h>
@@ -74,6 +74,7 @@ int FlexDRWorker::main() {
}
int FlexDRWorker::main_mt() {
ProfileTask profile("DR:main_mt");
using namespace std::chrono;
high_resolution_clock::time_point t0 = high_resolution_clock::now();
if (VERBOSE > 1) {
@@ -1231,6 +1232,7 @@ void FlexDR::init_via2turnMinLen() {
void FlexDR::init() {
ProfileTask profile("DR:init");
frTime t;
if (VERBOSE > 0) {
cout <<endl <<"start routing data preparation" <<endl;
@@ -1479,6 +1481,9 @@ void FlexDR::searchRepair(int iter, int size, int offset, int mazeEndIter,
frUInt4 workerMarkerBloatWidth, frUInt4 workerMarkerBloatDepth,
bool enableDRC, int ripupMode, bool followGuide,
int fixMode, bool TEST) {
std::string profile_name("DR:searchRepair");
profile_name += std::to_string(iter);
ProfileTask profile(profile_name.c_str());
if (iter > END_ITERATION) {
return;
}
@@ -1618,38 +1623,45 @@ void FlexDR::searchRepair(int iter, int size, int offset, int mazeEndIter,
// parallel execution
for (auto &workerBatch: workers) {
ProfileTask profile("DR:checkerboard");
for (auto &workersInBatch: workerBatch) {
// multi thread
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < (int)workersInBatch.size(); i++) {
workersInBatch[i]->main_mt();
#pragma omp critical
{
cnt++;
if (VERBOSE > 0) {
if (cnt * 1.0 / tot >= prev_perc / 100.0 + 0.1 && prev_perc < 90) {
if (prev_perc == 0 && t.isExceed(0)) {
isExceed = true;
}
prev_perc += 10;
//if (true) {
if (isExceed) {
if (enableDRC) {
cout <<" completing " <<prev_perc <<"% with " <<getDesign()->getTopBlock()->getNumMarkers() <<" violations" <<endl;
} else {
cout <<" completing " <<prev_perc <<"% with " <<numQuickMarkers <<" quick violations" <<endl;
{
ProfileTask profile("DR:batch");
// multi thread
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < (int)workersInBatch.size(); i++) {
workersInBatch[i]->main_mt();
#pragma omp critical
{
cnt++;
if (VERBOSE > 0) {
if (cnt * 1.0 / tot >= prev_perc / 100.0 + 0.1 && prev_perc < 90) {
if (prev_perc == 0 && t.isExceed(0)) {
isExceed = true;
}
prev_perc += 10;
//if (true) {
if (isExceed) {
if (enableDRC) {
cout <<" completing " <<prev_perc <<"% with " <<getDesign()->getTopBlock()->getNumMarkers() <<" violations" <<endl;
} else {
cout <<" completing " <<prev_perc <<"% with " <<numQuickMarkers <<" quick violations" <<endl;
}
cout <<" " <<t <<endl <<flush;
}
cout <<" " <<t <<endl <<flush;
}
}
}
}
}
// single thread
for (int i = 0; i < (int)workersInBatch.size(); i++) {
workersInBatch[i]->end();
{
ProfileTask profile("DR:end_batch");
// single thread
for (int i = 0; i < (int)workersInBatch.size(); i++) {
workersInBatch[i]->end();
}
workersInBatch.clear();
}
workersInBatch.clear();
}
}
}
@@ -1898,6 +1910,7 @@ void FlexDR::reportDRC() {
int FlexDR::main() {
ProfileTask profile("DR:main");
init();
frTime t;
if (VERBOSE > 0) {

View File

@@ -26,6 +26,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "frProfileTask.h"
#include "dr/FlexDR.h"
#include "io/io.h"
#include <omp.h>
@@ -1018,6 +1019,7 @@ void FlexDR::checkConnectivity_addMarker(frNet* net, frLayerNum lNum, const frBo
// feedthrough and loop check
void FlexDR::checkConnectivity(int iter) {
ProfileTask profile("DR:checkConnectivity");
bool isWrong = false;
int batchSize = 131072;

View File

@@ -26,8 +26,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "frProfileTask.h"
#include "dr/FlexDR.h"
//#include "drc/frDRC.h"
#include "gc/FlexGC.h"
#include <chrono>
#include <algorithm>
@@ -2939,6 +2939,7 @@ void FlexDRWorker::routeNet_prepAreaMap(drNet* net, map<FlexMazeIdx, frCoord> &a
}
bool FlexDRWorker::routeNet(drNet* net) {
ProfileTask profile("DR:routeNet");
//bool enableOutput = true;
bool enableOutput = false;
if (net->getPins().size() <= 1) {

74
src/frProfileTask.h Normal file
View File

@@ -0,0 +1,74 @@
/* Authors: Matt Liberty */
/*
* Copyright (c) 2020, The Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _FR_PROFILE_TASK_H_
#define _FR_PROFILE_TASK_H_
#ifdef HAS_VTUNE
#include <ittnotify.h>
#endif
namespace fr {
#ifdef HAS_VTUNE
// This class make a VTune task in its scope (RAII). This is useful
// in VTune to see where the runtime is going with more domain specific
// display.
class ProfileTask
{
public:
ProfileTask(const char* name) {
domain_ = __itt_domain_create("TritonRoute");
name_ = __itt_string_handle_create(name);
__itt_task_begin(domain_, __itt_null, __itt_null, name_);
}
~ProfileTask() {
__itt_task_end(domain_);
}
private:
__itt_domain* domain_;
__itt_string_handle* name_;
};
#else
// No-op version
class ProfileTask
{
public:
ProfileTask(const char* name) {
}
};
#endif
}
#endif

View File

@@ -27,6 +27,7 @@
*/
#include <iostream>
#include "frProfileTask.h"
#include "gc/FlexGC.h"
#include "db/drObj/drNet.h"
#include "dr/FlexDR.h"
@@ -749,6 +750,7 @@ void FlexGCWorker::initRegionQuery() {
// init initializes all nets from frDesign if no drWorker is provided
void FlexGCWorker::init() {
ProfileTask profile("GC:init");
//bool enableOutput = true;
bool enableOutput = false;
addNet(design->getTopBlock()->getFakeVSSNet()); //[0] floating VSS

View File

@@ -27,6 +27,7 @@
*/
#include <iostream>
#include "frProfileTask.h"
#include "gc/FlexGC.h"
using namespace std;
@@ -3697,6 +3698,7 @@ void FlexGCWorker::patchMetalShape_helper() {
int FlexGCWorker::main() {
ProfileTask profile("GC:main");
//printMarker = true;
// minStep patching for GF14
if (surgicalFixEnabled && getDRWorker() && DBPROCESSNODE == "GF14_13M_3Mx_2Cx_4Kx_2Hx_2Gx_LB") {

View File

@@ -31,6 +31,7 @@
#include <sstream>
#include <exception>
#include "frProfileTask.h"
#include "global.h"
#include "io/io.h"
#include "db/tech/frConstraint.h"
@@ -1069,6 +1070,7 @@ int io::Parser::Callbacks::getDefUnits(defrCallbackType_e type, double number, d
}
void io::Parser::readDef() {
ProfileTask profile("IO:readDef");
FILE* f;
int res;
@@ -4805,6 +4807,7 @@ int io::Parser::Callbacks::getLefViaRules(lefrCallbackType_e type, lefiViaRule*
}
void io::Parser::readLef() {
ProfileTask profile("IO:readLef");
FILE* f;
int res;
@@ -4908,6 +4911,7 @@ void io::Parser::readLefDef() {
}
void io::Parser::readGuide() {
ProfileTask profile("IO:readGuide");
if (VERBOSE > 0) {
cout <<endl <<"reading guide ..." <<endl;
@@ -5339,6 +5343,7 @@ void io::Writer::fillConnFigs(bool isTA) {
}
void io::Writer::writeFromTA() {
ProfileTask profile("IO:writeFromTA");
if (OUTTA_FILE == string("")) {
if (VERBOSE > 0) {
cout <<"Waring: no output def specified, skipped writing track assignment def" <<endl;
@@ -5354,6 +5359,7 @@ void io::Writer::writeFromTA() {
}
void io::Writer::writeFromDR(const string &str) {
ProfileTask profile("IO:writeFromDR");
if (OUT_FILE == string("")) {
if (VERBOSE > 0) {
cout <<"Waring: no output def specified, skipped writing routed def" <<endl;

View File

@@ -32,6 +32,7 @@
#include "global.h"
#include "io/io.h"
#include "frBaseTypes.h"
#include "frProfileTask.h"
#include <fstream>
#include <sstream>
@@ -667,6 +668,7 @@ void io::Parser::postProcess() {
}
void io::Parser::postProcessGuide() {
ProfileTask profile("IO:postProcessGuide");
if (VERBOSE > 0) {
cout <<endl <<"post process guides ..." <<endl;
}

View File

@@ -29,6 +29,7 @@
#include <iostream>
#include <sstream>
#include <chrono>
#include "frProfileTask.h"
#include "FlexPA.h"
#include "db/infra/frTime.h"
#include "gc/FlexGC.h"
@@ -37,6 +38,7 @@ using namespace std;
using namespace fr;
void FlexPA::init() {
ProfileTask profile("PA:init");
initViaRawPriority();
initTrackCoords();
@@ -45,6 +47,7 @@ void FlexPA::init() {
}
void FlexPA::prep() {
ProfileTask profile("PA:prep");
using namespace std::chrono;
high_resolution_clock::time_point t0 = high_resolution_clock::now();
prepPoint();
@@ -61,6 +64,8 @@ void FlexPA::prep() {
}
int FlexPA::main() {
ProfileTask profile("PA:main");
//bool enableOutput = true;
frTime t;
if (VERBOSE > 0) {

View File

@@ -29,6 +29,7 @@
#include <iostream>
#include <sstream>
#include <chrono>
#include "frProfileTask.h"
#include "FlexPA.h"
#include "db/infra/frTime.h"
#include "gc/FlexGC.h"
@@ -1116,6 +1117,7 @@ void FlexPA::prepPoint_pin(frPin* pin, frInstTerm* instTerm) {
}
void FlexPA::prepPoint() {
ProfileTask profile("PA:point");
// bool enableOutput = true;
bool enableOutput = false;
if (enableOutput) {
@@ -1138,6 +1140,7 @@ void FlexPA::prepPoint() {
inst->getRefBlock()->getMacroClass() != MacroClassEnum::RING) {
continue;
}
ProfileTask profile("PA:uniqueInstance");
for (auto &instTerm: inst->getInstTerms()) {
// only do for normal and clock terms
if (isSkipInstTerm(instTerm.get())) {
@@ -1198,6 +1201,7 @@ void FlexPA::prepPoint() {
}
void FlexPA::prepPattern() {
ProfileTask profile("PA:pattern");
//bool enableOutput = true;
bool enableOutput = false;
if (enableOutput) {

View File

@@ -28,6 +28,7 @@
#include <iostream>
#include <sstream>
#include "frProfileTask.h"
#include "FlexRP.h"
#include "db/infra/frTime.h"
#include "gc/FlexGC.h"
@@ -36,6 +37,7 @@ using namespace std;
using namespace fr;
void FlexRP::main() {
ProfileTask profile("RP:main");
init();
prep();
}

View File

@@ -28,6 +28,7 @@
#include <iostream>
#include <sstream>
#include "frProfileTask.h"
#include "FlexRP.h"
#include "db/infra/frTime.h"
#include "gc/FlexGC.h"
@@ -36,6 +37,7 @@ using namespace std;
using namespace fr;
void FlexRP::init() {
ProfileTask profile("RP:init");
bool enableOutput = false;
vector<pair<frCoord, frCoord> > forbiddenRanges;
@@ -66,4 +68,4 @@ void FlexRP::init() {
cout << "tech->line2LineForbiddenLen size = " << tech->line2LineForbiddenLen.size() << "\n";
cout << "tech->viaForbiddenThrough size = " << tech->viaForbiddenThrough.size() << "\n";
}
}
}

View File

@@ -28,6 +28,7 @@
#include <iostream>
#include <sstream>
#include "frProfileTask.h"
#include "FlexRP.h"
#include "db/infra/frTime.h"
#include "gc/FlexGC.h"
@@ -36,6 +37,7 @@ using namespace std;
using namespace fr;
void FlexRP::prep() {
ProfileTask profile("RP:prep");
prep_via2viaForbiddenLen();
prep_viaForbiddenTurnLen();
prep_viaForbiddenPlanarLen();

View File

@@ -32,6 +32,7 @@
#include "global.h"
#include "FlexTA.h"
#include "db/infra/frTime.h"
#include "frProfileTask.h"
#include <algorithm>
#include <omp.h>
@@ -82,6 +83,7 @@ int FlexTAWorker::main() {
}
int FlexTAWorker::main_mt() {
ProfileTask profile("TA:main_mt");
using namespace std::chrono;
high_resolution_clock::time_point t0 = high_resolution_clock::now();
if (VERBOSE > 1) {
@@ -222,6 +224,7 @@ int FlexTA::initTA_helper(int iter, int size, int offset, bool isH, int &numPane
// parallel execution
// multi thread
for (auto &workerBatch: workers) {
ProfileTask profile("TA:batch");
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < (int)workerBatch.size(); i++) {
workerBatch[i]->main_mt();
@@ -241,6 +244,7 @@ int FlexTA::initTA_helper(int iter, int size, int offset, bool isH, int &numPane
}
void FlexTA::initTA(int size) {
ProfileTask profile("TA:init");
frTime t;
if (VERBOSE > 1) {
@@ -282,6 +286,7 @@ void FlexTA::initTA(int size) {
}
void FlexTA::searchRepair(int iter, int size, int offset) {
ProfileTask profile("TA:searchRepair");
frTime t;
if (VERBOSE > 1) {
@@ -337,6 +342,8 @@ void FlexTA::searchRepair(int iter, int size, int offset) {
}
int FlexTA::main() {
ProfileTask profile("TA:main");
frTime t;
if (VERBOSE > 0) {
cout <<endl <<endl <<"start track assignment" <<endl;