/**
* AWX 자동 설치 실행
*
* @param task
*/
@org.springframework.transaction.annotation.Transactional(noRollbackFor = AICentroBaseException.class)
public void awxInstall(Task task) {
String towerToken = null;
if (towerToken == null) towerToken = ansibleTowerToken();
RestTemplate restTemplate = httpRequestFactory.getRestTemplate();
JsonParser parser = new JsonParser();
// default variable setting
Manage manageInfo = manageRepository.findById(Long.valueOf(1)).get();
// Inventory 생성
HttpHeaders header = new HttpHeaders();
header.setContentType(MediaType.APPLICATION_JSON_UTF8);
header.setBearerAuth(towerToken);
String inventoryUrl = manageInfo.getUrl() + "/api/v2/inventories/";
long inventoryId;
Cluster clusterInfo = findOneByIdClusterInfo(task.getClusterId());
String inventoryName = "aidx-inventory-" + clusterInfo.getClusterId();
JSONObject inventoryRequestBody = new JSONObject();
inventoryRequestBody.put("name", inventoryName);
inventoryRequestBody.put("organization", manageInfo.getOrganization());
try {
ResponseEntity<String> inventoryResponseEntity = restTemplate.exchange(inventoryUrl, HttpMethod.POST, new HttpEntity<>(inventoryRequestBody.toString(), header), String.class);
String inventoryResponse = inventoryResponseEntity.getBody();
JsonElement inventoryElement = parser.parse(inventoryResponse);
inventoryId = inventoryElement.getAsJsonObject().get("id").getAsInt();
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_INVENTORY: Inventory 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("Inventory Create Error", ReturnCode.FAIL_TO_CREATE_INVENTORY);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_INVENTORY: Inventory 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("Inventory Create Error", ReturnCode.FAIL_TO_CREATE_INVENTORY);
}
// Inventory Variable ansible_port
String inventoryVariableUrl = manageInfo.getUrl() + "/api/v2/inventories/" + inventoryId + "/variable_data/";
JSONObject inventoryVariableRequestBody = new JSONObject();
inventoryVariableRequestBody.put("ansible_port", manageInfo.getPortForwardPort());
try {
restTemplate.exchange(inventoryVariableUrl, HttpMethod.PUT, new HttpEntity<>(inventoryVariableRequestBody.toString(), header), String.class);
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_SET_INVENTORY_VARIABLE: Inventory 변수 설정 시 에러가 발생했습니다.");
throw new AICentroBaseException("Inventory set variable Error", ReturnCode.FAIL_TO_SET_INVENTORY_VARIABLE);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_SET_INVENTORY_VARIABLE: Inventory 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("Inventory set variable Error", ReturnCode.FAIL_TO_SET_INVENTORY_VARIABLE);
}
// Host 등록
String hostUrl = manageInfo.getUrl() + "/api/v2/inventories/" + inventoryId + "/hosts/";
String hostName = nodeRepository.findOneByClusterIdAndTypeOrderById(task.getClusterId());
long hostId;
JSONObject hostRequestBody = new JSONObject();
hostRequestBody.put("name", hostName);
try {
ResponseEntity<String> hostResponseEntity = restTemplate.exchange(hostUrl, HttpMethod.POST, new HttpEntity<>(hostRequestBody.toString(), header), String.class);
String hostResponse = hostResponseEntity.getBody();
JsonElement hostElement = parser.parse(hostResponse);
hostId = hostElement.getAsJsonObject().get("id").getAsInt();
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_HOST: Host 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("Host Create Error", ReturnCode.FAIL_TO_CREATE_HOST);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_HOST: Host 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("Host Create Error", ReturnCode.FAIL_TO_CREATE_HOST);
}
// Job template Create
String jobTempCreateName = "aidx-job-" + clusterInfo.getClusterId(); // aidx-job-{{clusterInfo.getUuid()}}
long jobTempId;
String jobTempCreateUrl = manageInfo.getUrl() + "/api/v2/job_templates/";
JSONObject jobTempCreateRequestBody = new JSONObject();
jobTempCreateRequestBody.put("name", jobTempCreateName);
jobTempCreateRequestBody.put("inventory", inventoryId);
jobTempCreateRequestBody.put("organization", manageInfo.getOrganization());
jobTempCreateRequestBody.put("verbosity", manageInfo.getVerbosity());
jobTempCreateRequestBody.put("playbook", manageInfo.getPlaybook());
jobTempCreateRequestBody.put("project", manageInfo.getProjectId());
jobTempCreateRequestBody.put("ask_variables_on_launch", "true");
try {
ResponseEntity<String> jobTempCreateResponseEntity = restTemplate.exchange(jobTempCreateUrl, HttpMethod.POST, new HttpEntity<>(jobTempCreateRequestBody.toString(), header), String.class);
String jobTempCreateResponse = jobTempCreateResponseEntity.getBody();
JsonElement jobTempCreateElement = parser.parse(jobTempCreateResponse);
jobTempId = jobTempCreateElement.getAsJsonObject().get("id").getAsInt();
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_JOB_TEMPLATE: Job Template 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("FAIL_TO_CREATE_JOB_TEMPLATE", ReturnCode.FAIL_TO_CREATE_JOB_TEMPLATE);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_CREATE_JOB_TEMPLATE: Job Template 생성 시 에러가 발생했습니다.");
throw new AICentroBaseException("FAIL_TO_CREATE_JOB_TEMPLATE", ReturnCode.FAIL_TO_CREATE_JOB_TEMPLATE);
}
// Job template Credential Key Inject
String jobTemplateCredentialUrl = manageInfo.getUrl() + "/api/v2/job_templates/" + jobTempId + "/credentials/";
JSONObject jobTemplateCredentialRequestBody = new JSONObject();
jobTemplateCredentialRequestBody.put("id", manageInfo.getCredentialKey());
try {
restTemplate.exchange(jobTemplateCredentialUrl, HttpMethod.POST, new HttpEntity<>(jobTemplateCredentialRequestBody.toString(), header), String.class);
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_SET_CREDENTIAL_KEY: CredentialKey 주입 시 에러가 발생했습니다.");
throw new AICentroBaseException("FAIL_TO_SET_CREDENTIAL_KEY", ReturnCode.FAIL_TO_SET_CREDENTIAL_KEY);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_SET_CREDENTIAL_KEY: CredentialKey 주입 시 에러가 발생했습니다.");
throw new AICentroBaseException("FAIL_TO_SET_CREDENTIAL_KEY", ReturnCode.FAIL_TO_SET_CREDENTIAL_KEY);
}
// Job template Launch
String jobTempLaunchUrl = manageInfo.getUrl() + "/api/v2/job_templates/" + jobTempId + "/launch/";
Storage storageInfo = this.findOneByClusterIdStorageInfo(task.getClusterId());
Admin adminInfo = findOneByIdAdminInfo(clusterInfo.getAdminId());
String firstNodeHostName = nodeRepository.findFirstByTypeOrderByHostNameAsc(clusterInfo.getId(), "node");
List<String> coreWorkerList = nodeRepository.findByTypeNodeInfo(task.getClusterId(), firstNodeHostName, true, "node", false);
String[] coreWorker = coreWorkerList.toArray(new String[coreWorkerList.size()]);
List<String> learningWorkerList = nodeRepository.findByTypeNodeInfo(task.getClusterId(), firstNodeHostName, false, "node", true);
String[] learningWorker = learningWorkerList.toArray(new String[learningWorkerList.size()]);
List<String> nodeListConvert = nodeRepository.findByTypeNodeInfo(task.getClusterId(), null, true, "node", true);
String[] nodeList = nodeListConvert.toArray(new String[nodeListConvert.size()]);
List<String> masterNodeListConvert = nodeRepository.findByTypeNodeInfo(task.getClusterId(), firstNodeHostName, false, "master", false);
String[] masterNodeList = masterNodeListConvert.toArray(new String[masterNodeListConvert.size()]);
List<String> gpuNodeListConvert = nodeRepository.findByClusterIdAndIsGpu(task.getClusterId());
String[] gpuNodeList = gpuNodeListConvert.toArray(new String[gpuNodeListConvert.size()]);
long jobId;
JSONObject jobTempLaunchRequestBody = new JSONObject();
jobTempLaunchRequestBody.put("nas_path", storageInfo.getIpAddress() + ":" + storageInfo.getPath());
jobTempLaunchRequestBody.put("volumes_nfs_address", storageInfo.getIpAddress());
jobTempLaunchRequestBody.put("volumes_nfs_path", storageInfo.getPath());
jobTempLaunchRequestBody.put("core_worker", coreWorker);
jobTempLaunchRequestBody.put("learning_worker", learningWorker);
jobTempLaunchRequestBody.put("master_node_list", masterNodeList);
jobTempLaunchRequestBody.put("api_host", clusterInfo.getClientDomain());
jobTempLaunchRequestBody.put("auth_host", clusterInfo.getClientDomain());
jobTempLaunchRequestBody.put("ui_route_ip", clusterInfo.getClientDomain());
jobTempLaunchRequestBody.put("app_config_okd_ip", clusterInfo.getClusterDomain());
jobTempLaunchRequestBody.put("app_config_node_list", nodeList);
jobTempLaunchRequestBody.put("app_config_prometheus_url", manageInfo.getPrometheusUrl());
jobTempLaunchRequestBody.put("app_config_login_username", adminInfo.getUserName());
jobTempLaunchRequestBody.put("app_config_route_ip", clusterInfo.getClientDomain());
jobTempLaunchRequestBody.put("app_config_login_password", awxConfig.getAwxPassword());
jobTempLaunchRequestBody.put("aicentro_okd_userpass", awxConfig.getOkdPassword());
jobTempLaunchRequestBody.put("aicentro_okd_username", manageInfo.getOkdUsername());
jobTempLaunchRequestBody.put("aicentro_username", adminInfo.getUserName());
jobTempLaunchRequestBody.put("aicentro_subdomain", manageInfo.getSubdomain());
jobTempLaunchRequestBody.put("aicentro_service_subdomain", manageInfo.getServiceSubdomain());
jobTempLaunchRequestBody.put("working_dir", manageInfo.getWorkingDir());
jobTempLaunchRequestBody.put("nas_mount_dir", manageInfo.getNasMountDir());
jobTempLaunchRequestBody.put("cluster_name", clusterInfo.getClusterName());
jobTempLaunchRequestBody.put("docker_pull_registry", manageInfo.getDockerPullRegistry());
jobTempLaunchRequestBody.put("docker_push_registry", manageInfo.getDockerPushRegistry());
if (!gpuNodeListConvert.isEmpty()) jobTempLaunchRequestBody.put("gpu_node_list", gpuNodeList);
JSONObject jobTempLaunchRequestBodyExtraVar = new JSONObject();
jobTempLaunchRequestBodyExtraVar.put("extra_vars", jobTempLaunchRequestBody);
try {
ResponseEntity<String> jobTempLaunchResponseEntity = restTemplate.exchange(jobTempLaunchUrl, HttpMethod.POST, new HttpEntity<>(jobTempLaunchRequestBodyExtraVar.toString(), header), String.class);
String jobTempLaunchResponse = jobTempLaunchResponseEntity.getBody();
JsonElement jobTempLaunchElement = parser.parse(jobTempLaunchResponse);
jobId = jobTempLaunchElement.getAsJsonObject().get("job").getAsInt();
// Job Launch execute -> update task status 'PENDING' & taskInfo
TaskDto.Update updateTaskInfo = TaskDto.Update.builder()
.clusterId(task.getClusterId())
.hostId(hostId)
.inventoryId(inventoryId)
.jobId(jobId)
.jobTempId(jobTempId)
.status(PENDING)
.build();
task.updateTaskInfo(updateTaskInfo);
} catch (RestClientException e) {
task.updateTaskStatus(ERROR, "FAIL_TO_LAUNCH_JOB_TEMPLATE: Job Template Launch 시 에러가 발생했습니다.");
throw new AICentroBaseException("Host Create Error", ReturnCode.FAIL_TO_LAUNCH_JOB_TEMPLATE);
} catch (Exception e) {
task.updateTaskStatus(ERROR, "FAIL_TO_LAUNCH_JOB_TEMPLATE: Job Template Launch 시 에러가 발생했습니다.");
throw new AICentroBaseException("Host Create Error", ReturnCode.FAIL_TO_LAUNCH_JOB_TEMPLATE);
}
}