// ==UserScript== // @name MS AKS Tool Launcher // @namespace https://microsoft.com // @description Effort to simplify launching AKS support-related tools // @require http://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js // @require http://ajax.googleapis.com/ajax/libs/jqueryui/1.11.1/jquery-ui.min.js // @require https://gist.github.com/raw/2625891/waitForKeyElements.js // @grant GM_addStyle // @grant GM_setClipboard // @include /^https?://servicedesk\.microsoft\.com/.*$/ // @include /^https?://servicedesk/.*$/ // @version 0.0.1 // ==/UserScript== const urlRegex = /^http?s:\/\/servicedesk(\.microsoft\.com)?\/\#\/customer\/commercial\?caseNumber=\d+$/ if (urlRegex.test($("ul").context.URL)) { try { waitForKeyElements("#mc-case-section", storeAndPrint); function storeAndPrint () { var region = ""; var d = new Date(); var endDate = d.toISOString().valueOf() d.setDate(d.getDate() - 1) var startDate = d.toISOString().valueOf() const caseText = $("#mc-case-section")[0].innerText; const caseID = $("#content")[0].innerText.split('\n')[1] const resourceURIRegex = /\/subscriptions\/[\d\w]{8}-[\d\w]{4}-[\d\w]{4}-[\d\w]{4}-[\d\w]{12}\/resourceGroups\/.*\/providers\/Microsoft\.ContainerService\/managedClusters\/.*/g; const resourceURI = caseText.match(resourceURIRegex)[0]; const ascResourceURI = resourceURI.replace(/\//gi, "~2F"); const subscription = resourceURI.split('/')[2]; const resourceGroup = resourceURI.split('/')[4]; const cluster = resourceURI.split('/')[8]; var template = ` // Here’s are ALL the errors/messages for the AKS clusters in the resource group in the past 90 days - union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity | where subscriptionID contains "${subscription}" | where resourceName contains "${cluster}" | where level != "info" | where PreciseTimeStamp > ago(90d) | project PreciseTimeStamp, operationID, correlationID, level, suboperationName, msg // Here’s are the recent scale/upgrade operations – union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity | where subscriptionID contains "${subscription}" | where resourceName contains "${cluster}" | where msg contains "intent" or msg contains "Upgrading" or msg contains "Successfully upgraded cluster" or msg contains "Operation succeeded" or msg contains "validateAndUpdateOrchestratorProfile" // or msg contains "unique pods in running state" | where PreciseTimeStamp > ago(90d) | project PreciseTimeStamp, operationID, correlationID, level, suboperationName, msg // Shows the scale errors/messages for an AKS cluster using the operationID from the previous query union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity | where operationID == "" | where level != "info" | project PreciseTimeStamp, level, msg //Black box monitoring FIND fqdn by customer"s subscriptionID cluster("aks").database("AKSprod").BlackboxMonitoringActivity | where subscriptionID == "${subscription}" and resourceGroupName contains "${resourceGroup}" | where PreciseTimeStamp > ago(9d) | summarize by fqdn, resourceGroupName, resourceName, underlayName //Black box monitoring using fqdn to find where cluster is not healthy cluster("aks").database("AKSprod").BlackboxMonitoringActivity | where fqdn == "replacefqdn" // | where (["state"] != "Healthy" or podsState != "Healthy" or resourceState != "Healthy" or addonPodsState != "Healthy") | where PreciseTimeStamp > ago(20d) | project fqdn, PreciseTimeStamp, agentNodeName, state, reason, podsState, resourceState, addonPodsState, agentNodeCount, provisioningState, msg, resourceGroupName, resourceName, underlayName // | order by PreciseTimeStamp asc // | render timepivot by fqdn, reason, agentNodeName, addonPodsState | render timepivot by fqdn, agentNodeName, addonPodsState, reason // | summarize count() by reason // | sort by reason //Black box monitoring for cluster cluster("aks").database("AKSprod").BlackboxMonitoringActivity | where PreciseTimeStamp > ago(12h) and underlayName == "" | where reason != "" | summarize count() by reason | top 10 by count_ desc // Find Errors reported by ARM Failed - Deleted - Created cluster("ARMProd").database("ARMProd").EventServiceEntries | where subscriptionId == "${subscription}" | where resourceUri contains "${cluster}" | where TIMESTAMP > ago(3d) | where status == "Failed" | project PreciseTimeStamp, correlationId , operationId, operationName, properties // Get serviceRequestId of processes sent to cluster cluster("ARMProd").database("ARMProd").HttpOutgoingRequests | where httpMethod != "GET" | where TIMESTAMP > ago(1d) | where targetUri contains "${cluster}"// and targetUri contains "${subscription}" | project TIMESTAMP, ActivityId, serviceRequestId , clientRequestId, failureCause, httpMethod , operationName, targetUri // Use the activityID from the previous query. cluster("Azcrp").database("crp_allprod").ContextActivity | where TIMESTAMP between (datetime(2018-08-17T07:57Z)..datetime(2018-08-17T09:28Z)) | where subscriptionId == "${subscription}" // | where activityId == "3817a3d4-7045-4db5-bc7f-45dbffe2166a" // | where message contains "${cluster}" // | where PreciseTimeStamp > ago(3d) // datetime(2018-07-31) | project PreciseTimeStamp, activityId, traceLevel, message // claims name shows WHO requested or performed the action cluster("ARMProd").database("ARMProd").EventServiceEntries | where subscriptionId == "${subscription}" | where resourceUri contains "${cluster}" // | where claims contains "1d78a85d-813d-46f0-b496-dd72f50a3ec0" // | where ActivityId == "3817a3d4-7045-4db5-bc7f-45dbffe2166a" // | where operationName contains "delete" | where TIMESTAMP between (datetime(2018-08-17T07:57Z)..datetime(2018-08-17T09:28Z)) // | where claims contains "baead28c-2ce7-4550-83a5-5e6a2deb02b8" // | where status == "Failed" | project PreciseTimeStamp, claims, authorization, properties, resourceUri, operationName //, httpRequest, correlationId, operationId, Deployment, operationName // | project PreciseTimeStamp, resourceUri , issuer, issuedAt // Get the PUT operation. This query also shows the command used (aks get-credentials, browse, scale, show, create) cluster("Armprod").database("ARMProd").HttpIncomingRequests | where subscriptionId == "${subscription}" | where targetUri contains "${cluster}" // | where authorizationAction contains "write" or authorizationAction contains "delete" | where commandName contains "aks" and httpMethod == "PUT" | where PreciseTimeStamp > ago(3d) | project TIMESTAMP,httpMethod,commandName,failureCause,serviceRequestId,authorizationAction,errorCode,errorMessage,subscriptionId,correlationId,targetUri // Get the PUT operation cluster("Armprod").database("ARMProd").HttpIncomingRequests | where subscriptionId == "${subscription}" | where targetUri contains "${cluster}" and authorizationAction contains "Clusters" | where httpMethod == "PUT" | where PreciseTimeStamp > ago(3d) // between (datetime(2018-07-16) .. datetime(2018-07-20)) | project TIMESTAMP, commandName , serviceRequestId , httpMethod , authorizationAction , operationName // cluster("Aks").database("AKSprod").FrontEndQoSEvents | where subscriptionID contains "${subscription}" | where resourceName contains "${cluster}" // | where operationName !contains "delete" | where PreciseTimeStamp > ago(3d) // feature-gates will be broken when upgrading to 1.11.0+ Please code all cases against - 2835281 - for this issue (also added in the wiki) // Chase has created doc for scenario where upgrade failing to 1.11 and nodes getting disappear. Here is the doc: https://www.csssupportwiki.com/index.php/curated:Azure/Virtual_Machine/Products/Azure_Kubernetes_Service/TSG/upgrade_to_1.11_NodesNotReady // Customer can run this script on nodes which are missing // az vm run-command invoke -g $MC_RG_NAME -n $NODE_NAME --command-id RunShellScript --scripts "sed -i "s/--feature-gates=Accelerators=true //" /etc/default/kubelet && systemctl daemon-reload && systemctl restart kubelet" // But a roll is being fixed out , once fix has been rolled out they can retry the upgrade cluster("Aks").database("AKSprod").AsyncQoSEvents | where subscriptionID == "${subscription}" | where TIMESTAMP > ago(3d) | where suboperationName == "Upgrading" and propertiesBag contains "1.11" | extend bag = parse_json(propertiesBag) | extend from_version = tostring(bag.k8sCurrentVersion) | extend to_version = tostring(bag.k8sGoalVersion) | where from_version !contains "1.11" and to_version contains "1.11" and resultCode == "NodesNotReady" //Black box monitoring for cluster cluster("aks").database("AKSprod").BlackboxMonitoringActivity | where PreciseTimeStamp > ago(1d) | where fqdn contains "replacefqdn" // | where ["state"] == "Unhealthy" | summarize count(state) by bin(PreciseTimeStamp, 5min), state | render timeline //Black box monitoring for cluster cluster("aks").database("AKSprod").BlackboxMonitoringActivity | where PreciseTimeStamp > ago(1d) | where fqdn contains "replacefqdn" | where state != "Healthy" | project PreciseTimeStamp, state, provisioningState, reason, agentNodeCount, msg, resourceGroupName, resourceName, underlayName | order by PreciseTimeStamp asc // | render timeline cluster("Aks").database("AKSprod").AsyncQoSEvents | sample 10 `; const regionRegex = /- Location:.*/g; const regionlower = caseText.match(regionRegex)[0].split(' ')[2]; switch (regionlower) { case 'australiaeast': region = "AustraliaEast"; break; case 'brazilsouth': region = "BrazilSouth"; break; case 'canadacentral': region = "CanadaCentral"; break; case 'canadaeast': region = "CanadaEast"; break; case 'centralindia': region = "CentralIndia"; break; case 'centralus': region = "CentralUS"; break; case 'eastasia': region = "EastAsia"; break; case 'eastus': region = "EastUS"; break; case 'eastus2': region = "EastUs2"; break; case 'eastus2euap': region = "EastUS2EUAP"; break; case 'francecentral': region = "FranceCentral"; break; case 'japaneast': region = "JapanEast"; break; case 'japanwest': region = "JapanWest"; break; case 'northcentralus': region = "NorthCentralUS"; break; case 'northeurope': region = "NorthEurope"; break; case 'southafricanorth': region = "SouthAfricaNorth"; break; case 'southcentralus': region = "SouthCentralUS"; break; case 'southeastasia': region = "SouthEastAsia"; break; case 'southindia': region = "SouthIndia"; break; case 'uksouth': region = "UkSouth"; break; case 'ukwest': region = "UKWest"; break; case 'westcentralus': region = "WestCentlraUS"; break; case 'westeurope': region = "WestEurope"; break; case 'westus': region = "WestUs"; break; case 'westus2': region = "WestUs2"; break; default: console.log(`Sorry, looks like ${regionlower} isn't stored as a valid region. Please reach out to tybean@.`); } const tenantIDRegex = /- Tenant Id:.*/g; const tenantID = caseText.match(tenantIDRegex)[0].split(' ')[3]; const objectIDRegex = /- Object Id:.*/g; const objectID = caseText.match(objectIDRegex)[0].split(' ')[3]; const appLensURL = `https://applens.azurewebsites.net${resourceURI}/home/category?startTime=${startDate}&endTime=${endDate}`; const jarvisURL = `https://jarvis-west.dc.ad.msft.net/?page=actions&acisEndpoint=Public&selectedNodeType=3&extension=AzureContainerService%20AKS&group=Resource%20Operations&operationId=GetManagedCluster&operationName=Get%20Managed%20Cluster&inputMode=single¶ms={"wellknownsubscriptionid":"${subscription}","smeaksresourcegroup":"${resourceGroup}","smeaksresource":"${cluster}","smeaksapiversions":"2019-04-01"}&actionEndpoint=${region}`; const ascURL = `https://azuresupportcenter.msftcloudes.com/resourceExplorer/resource/${ascResourceURI}?srId=${caseID}` var zNode = document.createElement ('div'); zNode.innerHTML = ' ' ; zNode.setAttribute ('id', 'myContainer'); document.body.appendChild (zNode); //--- Activate the newly added button. document.getElementById ("appLensButton").addEventListener ( "click", appLensButtonClickAction, false ); document.getElementById ("jarvisButton").addEventListener ( "click", jarvisButtonClickAction, false ); document.getElementById ("ascButton").addEventListener ( "click", ascButtonClickAction, false ); document.getElementById ("kustoButton").addEventListener ( "click", kustoButtonClickAction, false ); document.getElementById ("oneClickButton").addEventListener ( "click", oneClickButtonClickAction, false ); function appLensButtonClickAction (zEvent) { window.open(appLensURL, "_blank"); } function jarvisButtonClickAction (zEvent) { window.open(jarvisURL, "_blank"); } function ascButtonClickAction (zEvent) { window.open(ascURL, "_blank"); } function kustoButtonClickAction (zEvent) { GM_setClipboard(template, "kusto queries"); alert(`Kusto queries for ${cluster} have been copied to clipboard!`); } function oneClickButtonClickAction (zEvent) { appLensButtonClickAction(zEvent); jarvisButtonClickAction(zEvent); ascButtonClickAction(zEvent); kustoButtonClickAction(zEvent); } //--- Style our newly added elements using CSS. GM_addStyle ( multilineStr ( function () {/*! #myContainer { position: absolute; bottom: 0; left: 0; font-size: 14px; background: orange; border: 3px outset black; margin: 3px; opacity: 0.7; z-index: 222; padding: 5px 5px; } #myButton { cursor: pointer; } #myContainer p { color: red; background: white; } */} ) ); function multilineStr (dummyFunc) { var str = dummyFunc.toString (); str = str.replace (/^[^\/]+\/\*!?/, '') // Strip function () { /*! .replace (/\s*\*\/\s*\}\s*$/, '') // Strip */ } .replace (/\/\/.+$/gm, '') // Double-slash comments wreck CSS. Strip them. ; return str; } } } catch (err) { // If an error was thrown, go ahead and present it as an alert to help // with debugging any problems alert(err.toString()); } }