Move to incidents subfolder

This commit is contained in:
2024-07-30 08:54:42 -04:00
parent a26bb2a1a4
commit 1ddc9f19f1

View File

@@ -0,0 +1,328 @@
#+TITLE: Incident 2590
#+AUTHOR: Adam Mohammed
#+DATE: May 2, 2024
* Starting out
There are CPUs missing their ProcessorComponent information.
** Get a list of affected hardware
#+BEGIN_SRC ruby
affected_servers = []
Hardware::Server.find_in_batches do |hbatch|
hbatch.each do |h|
affected_servers << h unless h.components.any? { |c| c.type == "ProcessorComponent" }
end
end
#+END_SRC
#+DATE:
#+BEGIN_EXAMPLE
1685 total affected
#+END_EXAMPLE
** Classify the affected hardware by class and plan
#+BEGIN_SRC ruby
affected_server_types = Hash.new(0)
affected_servers.each do |h|
affected_server_types[h.class] += 1
end
#+END_SRC
#+BEGIN_EXAMPLE ruby
irb(main):269:0> affected_server_types
=> {"Hardware::StorageAppliance"=>170, "Hardware::Open19Node"=>195, "Hardware::Server"=>1319, "Hardware::StorageServer"=>1}
#+END_EXAMPLE
#+BEGIN_SRC ruby
affected_plan_types = Hash.new(0)
affected_servers.each do |h|
next unless h.plan.present?
affected_plan_types[h.plan.slug.to_s] += 1
end; nil
#+END_SRC
#+BEGIN_EXAMPLE ruby
{"storage.custom"=>102,
"m3.large.x86"=>329,
"c3.small.x86"=>120,
"m3.small.x86"=>143,
"n2.xlarge.x86"=>23,
"c2.medium.x86"=>124,
"c3.medium.x86"=>396,
"netapp.storage"=>16,
"m2.xlarge.x86"=>31,
"nvidia3.a100.medium"=>1,
"t3.small.x86"=>13,
"n3.xlarge.x86"=>155,
"w3amd.75xx24c.512.8160"=>102,
"s3.xlarge.x86"=>29,
"appliance.dell.hci.vxrail.opt-m.x86"=>12,
"m3.large.opt-c2"=>3,
"nvidia3.a30.medium"=>11,
"purestorage"=>6,
"a3.large.opt-s4a5n1.x86"=>17,
"nvidia3.a30.large"=>3,
"n3.xlarge.opt-m4"=>4,
"storage.dell"=>14,
"nvidia3.a40.medium"=>9,
"w3amd.7402p.256.8160"=>1,
"a4.lg"=>5,
"a3.large.x86"=>1,
"x.large.arm"=>1,
"w3amd.75xx24c.256.4320"=>1,
"npi.testing"=>1,
"m3.large.opt-c2m4"=>1,
"a3.large.opt-s4a1"=>1,
"w3amd.75xx24c.256.8160"=>1,
"c3.large.arm64"=>2}
#+END_EXAMPLE
** What hardware is missing plan information
#+BEGIN_SRC ruby
missing_plan = []
affected_servers.each do |h|
missing_plan << h unless h.plan.present?
end; nil
#+END_SRC
#+BEGIN_EXAMPLE ruby
irb(main):289:0> missing_plan.pluck(:id, :type, :state)
=>
[["2556229f-3da0-4056-96dc-ce820af30ba3", "Hardware::Server", "enrolled"],
["4ca367f4-33c2-494f-8227-bed6c0d8bd8d", "Hardware::Server", "enrolled"],
["8504ffdf-24d7-453f-9a49-94a7cba3f9ae", "Hardware::StorageAppliance", "enrolled"],
["8b383a51-2a45-4d02-aafa-f31b159e31b6", "Hardware::Server", "enrolled"],
["a20a6442-7185-4c49-bfcf-5359fe22cd9f", "Hardware::StorageAppliance", "enrolled"],
["e2ff6fec-a70a-42e6-afb1-93f57c6a30f1", "Hardware::Server", "enrolled"],
["f9670617-0cde-4db6-94de-d7ec495881e7", "Hardware::StorageAppliance", "enrolled"]]
#+END_EXAMPLE
I think it's safe to not worry about these because customers can't deploy them yet.
** What hardware plan versions don't have the required CPU information?
#+BEGIN_SRC ruby
def valid_cpu_data?(hardware)
required_keys = ["cores", "count"]
return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present?
cpu_data = hardware.plan_version.specs["cpus"][0]
required_keys.map do |k|
cpu_data.keys.include? k
end.all?
end
affected_plan_versions = Hash.new(0)
affected_servers.each do |h|
next unless h.plan_version.present?
affected_plan_versions[h.plan_version.slug] += 1 unless valid_cpu_data?(h)
end; nil
#+END_SRC
** These are the the ones that currently are not being billed properly
#+BEGIN_SRC ruby
broken_billables = [
"39b7f377-af6d-437b-a99b-10d9d4fd7b53",
"d2deb4c8-446f-4679-a7f5-60edf7745e23",
"e9c50e27-9f74-477b-9210-0e277537a336",
"88a6bf4a-b63e-4c7e-8c20-5d5949ba62f9",
"5b205c53-af64-421e-b2b6-39f5923d4f3f",
"604c38d9-1f8c-4600-bb29-a0d5e1aa504a",
"d4914c80-c657-4ff2-86a1-8f41d90af0a9",
"f6f087f3-3e7c-457f-8943-a6864a8a0b97",
"88d2e8ee-6ec1-450b-9982-63d8220a1011",
"a47f38f9-c2ac-46ba-bb16-68e659b89183",
"e47a3d2e-13a0-444a-8164-ebe54fbc43b1",
"840ce4fd-a300-4a7b-96a3-140e0bf988b4",
"68e0feb1-8146-4b08-a591-15806a0f61a0",
"0e1ef1c6-2de7-40b3-91e0-44474f32fafb",
"161d4f10-4362-4028-b237-b7649f87eb09"
]
#+END_SRC
** Do these pieces of hardware have the information I need to fix the data?
#+BEGIN_SRC ruby
def my_valid_cpu_data?(hardware)
required_keys = ["cores", "count"]
return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present?
cpu_data = hardware.plan_version.specs["cpus"][0]
required_keys.map do |k|
cpu_data.keys.include? k
end.all?
end
can_be_fixed = []
broken_hardware.each do |h|
next unless h.plan_version.present?
can_be_fixed << h if my_valid_cpu_data?(h)
end; nil
#+END_SRC
** Actually fix the components
#+BEGIN_SRC ruby
def create_processor_component(h_id, cpu_data, index)
cpu = ProcessorComponent.new
cpu.name = cpu_data["name"]
cpu.type = ProcessorComponent.to_s
cpu.vendor = cpu_data["manufacturer"]
cpu.model = cpu_data["model"]
cpu.serial = "CPU#{index}"
cpu.firmware_version = "N/A"
cpu.data = {
"clock" => cpu_data["speed"],
"cores" => cpu_data["cores"],
}
cpu.hardware_id = h_id
cpu
end
#+END_SRC
#+BEGIN_SRC ruby
cant_fix = []
finished = []
broken_hardware.each_with_index do |h, i|
unless h.plan_version.present? && h.plan_version["cpus"].present?
cant_fix << h
next
end
cpu_data = h.plan_version["cpus"][0]
core_count = h
c = create_processor_component(h.id, ,
#+END_SRC
#+BEGIN_SRC ruby
"04af7a5f-6330-4095-b525-ea8a596db035"
"111fc3d1-7002-4c22-9d29-e2539c610bb1"
"15a4071c-ddd9-4fc5-b9b9-35d5831a9de3"
"19798268-39ca-454e-a7de-cab1a9cae4a5"
"1df18ad3-3189-4b87-9654-7d9b062d553d"
"20388df4-c645-445c-8563-114213c85604"
"2cafd1cc-a6ba-4caf-849d-969ac22eddca"
"2cc2596e-8045-49ea-8274-5b84e27a643c"
"2d4941a3-f0ce-454c-b9dc-6f5bf3381519"
"2e13125c-9794-4392-ab7c-0dbb10b3b4f7"
"2e24c7dc-a219-45c2-ae79-1aa0eb367d56"
"2ffa9123-6466-49a3-ac81-84a7e0dcb437"
"35d423fa-e119-4c9b-8eed-9193a4037b18"
"39888ace-88cb-49f8-8eef-f1ec14c36d2c"
"4470e1bc-0c1e-47ac-99e0-8f23cc075228"
"56c91002-4e8e-4ab8-b653-d8fb459ad186"
"59daefde-f2c2-42c2-8bc9-90d5a00e98e9"
"5bf121bf-1b11-429b-9f73-11206e9f438c"
"5f81d1f6-9c7d-41b0-bb02-a4cb5b31b1ab"
"613d4464-8c0b-44a8-8bcc-9ece50b17ce5"
"62e344ed-2fe1-4778-92e8-0dd386cf0590"
"630cf74d-d689-496c-b29f-5f094c4455d5"
"649fa2b1-675c-4433-9256-e7632092ab8a"
"66f1ef27-3310-40c3-8d06-6c889ddc1e15"
"6ac54a10-c47d-446d-8ef5-d4131bdc746c"
"6c7e5828-68fe-4114-a8e8-1e3ce9747de0"
"773240e6-7f9b-472f-847e-0a9f914e4493"
"77f9ba1e-bcd4-46c9-963a-b861fb573ab2"
"7fe941fd-4533-411e-93ad-832632910cf2"
"858a0e53-56ec-4b77-b852-8371f3ead1bd"
"85b1ab1b-664b-4d0f-855e-30ccf7f16f50"
"921d04e8-b7b8-4e13-a9f3-f55302d970c1"
"9430eb5e-fbe2-48b0-b180-d94347a5f296"
"a172606f-4d90-41b8-a1f1-0cd1b20aaa7f"
"a8f5d150-0f5b-4a92-9583-0e70473a9b8b"
"a96d685d-b16f-4852-84dd-dd3304b37471"
"aa1f836f-5808-452a-a5bc-884acd3bcd90"
"abc678fd-d92b-4fc9-ad46-bc6316c170c6"
"afce0857-1016-4638-91bf-f67ee9ade423"
"b37bae11-645f-45cf-b55e-20604b5f3030"
"d263768d-c460-4bdd-81fa-c04fe80122cc"
"d4849d7e-8b68-4f14-97f6-0682c20d4706"
"d634a3b5-98ef-4eca-8fe3-3bc4903170c9"
"dff6b6b3-d46e-47c8-8c85-e85f2566893b"
"e248f2f0-b1dc-4e6e-b025-687ea375fe2d"
"e9e17d57-f8dd-4f8a-b31b-6e33c8e25078"
"fa97834e-d71e-4d8f-8fc0-2e8988a05a28"
"fb6c21a5-8640-4e1e-af18-2790f3a79873"
#+END_SRC
1. LicenseActivationID
2. Licensable (an Instance Model)
3. PlanVersion
4. CPU count and CPU cores
5. Update License.data["cores"] = cpu_count * cpu_cores
#+BEGIN_SRC ruby
# this doesn't save the things
def fix_core_count_prime(license_activation)
instance = license_activation.licensable
return "missing instance" unless instance.present?
plan_version = instance.plan_version
return "missing plan_version" unless plan_version.present?
cpu_data = plan_version.specs["cpus"][0]
return "missing cpu_data" unless cpu_data.present? && cpu_data["cores"] && cpu_data["count"]
cpu_count = plan_version.specs["cpus"][0]["count"].to_i
cpu_cores = plan_version.specs["cpus"][0]["cores"].to_i
license = license_activation.license
license.data["cores"] = cpu_count * cpu_cores
license
end
res = broken_license_activations[2..].map do |la_id|
la = LicenseActivation.with_deleted.find(la_id)
return "couldn't find la #{la_id}" unless la.present?
fix_core_count_prime(la)
end
# when I wanted to get the successful ones
res.filter { |item| item.is_a? License }
# when I wanted to see what broke
res.filter { |item| !item.is_a? License }
#+END_SRC
** Are there any windows licenses remaining with 0 cores, that aren't erroring yet?
#+BEGIN_SRC ruby
activations = LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").all
missing_cores = activations.map do |la|
if la.license.data["cores"] == 0
la
else
nil
end
end.compact
fixed_licenses = missing_cores.map do |la|
fix_core_count_prime(la)
end
#+END_SRC
#+BEGIN_SRC ruby
irb(main):066:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer = 0").count
=> 0
irb(main):067:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer > 0").count
=> 538
#+END_SRC