Skip to content

Commit

Permalink
Misc
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Dec 8, 2024
1 parent 567fd1d commit eb9d748
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 27 deletions.
44 changes: 37 additions & 7 deletions src/parseMetaString.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,41 @@
// constructed with the assistance of claude AI
function customSplit(str: string) {
const result = []
let current = ''
let inQuotes = false
let inBrackets = false

for (const char of str) {
if (char === '"') {
inQuotes = !inQuotes
current += char
} else if (char === '[') {
inBrackets = true
current += char
} else if (char === ']') {
inBrackets = false
current += char
} else if (char === ',' && !inQuotes && !inBrackets) {
result.push(current.trim())
current = ''
} else {
current += char
}
}

if (current) {
result.push(current.trim())
}

return result
}

export function parseMetaString(metaString: string) {
const inside = metaString.replace(/^<|>$/g, '')
return Object.fromEntries(
[
// split string on comma except when in square bracket
// https://stackoverflow.com/questions/74238461/
...metaString.replace(/^<|>$/g, '').matchAll(/(?:\[[^\][]*\]|[^,])+/g),
].map(f => {
const [key, val] = f[0].split('=').map(f => f.trim())
customSplit(inside).map(f => {
const [key, val] = f.split('=').map(f => f.trim())
console.log({ key, val })
if (val && val.startsWith('[') && val.endsWith(']')) {
return [
key,
Expand All @@ -17,7 +47,7 @@ export function parseMetaString(metaString: string) {
} else if (val && val.startsWith('"') && val.endsWith('"')) {
return [key, val.slice(1, -1)]
} else {
return [key, val]
return [key, val?.replaceAll(/^"|"$/g, '')]
}
}),
)
Expand Down
28 changes: 9 additions & 19 deletions test/__snapshots__/parse.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,9 @@ exports[`can get metadata from the header 1`] = `
"Type": "Integer",
},
"DB": {
"Description": ""dbSNP membership",
"Description": "dbSNP membership, build 129",
"Number": 0,
"Type": "Flag",
"build 129"": undefined,
},
"DBRIPID": {
"Description": "ID of this element in DBRIP",
Expand Down Expand Up @@ -460,10 +459,9 @@ exports[`can get metadata from the header 2`] = `
"Type": "Integer",
},
"DB": {
"Description": ""dbSNP membership",
"Description": "dbSNP membership, build 129",
"Number": 0,
"Type": "Flag",
"build 129"": undefined,
},
"DBRIPID": {
"Description": "ID of this element in DBRIP",
Expand Down Expand Up @@ -1346,16 +1344,14 @@ exports[`clinvar metadata 1`] = `
"Type": "Integer",
},
"CLNDISDB": {
"Description": ""Tag-value pairs of disease database name and identifier submitted for germline classifications",
"Description": "Tag-value pairs of disease database name and identifier submitted for germline classifications, e.g. OMIM:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. OMIM:NNNNNN"": undefined,
},
"CLNDISDBINCL": {
"Description": ""For included Variant: Tag-value pairs of disease database name and identifier for germline classifications",
"Description": "For included Variant: Tag-value pairs of disease database name and identifier for germline classifications, e.g. OMIM:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. OMIM:NNNNNN"": undefined,
},
"CLNDN": {
"Description": "ClinVar's preferred disease name for the concept specified by disease identifiers in CLNDISDB",
Expand All @@ -1368,11 +1364,9 @@ exports[`clinvar metadata 1`] = `
"Type": "String",
},
"CLNHGVS": {
"Description": ""Top-level (primary assembly",
"Description": "Top-level (primary assembly, alt, or patch) HGVS expression.",
"Number": ".",
"Type": "String",
"alt": undefined,
"or patch) HGVS expression."": undefined,
},
"CLNREVSTAT": {
"Description": "ClinVar review status of germline classification for the Variation ID",
Expand Down Expand Up @@ -1537,16 +1531,14 @@ exports[`clinvar metadata 1`] = `
"Type": "String",
},
"ONCDISDB": {
"Description": ""Tag-value pairs of disease database name and identifier submitted for oncogenicity classifications",
"Description": "Tag-value pairs of disease database name and identifier submitted for oncogenicity classifications, e.g. MedGen:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. MedGen:NNNNNN"": undefined,
},
"ONCDISDBINCL": {
"Description": ""For included variant: Tag-value pairs of disease database name and identifier for oncogenicity classifications",
"Description": "For included variant: Tag-value pairs of disease database name and identifier for oncogenicity classifications, e.g. OMIM:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. OMIM:NNNNNN"": undefined,
},
"ONCDN": {
"Description": "ClinVar's preferred disease name for the concept specified by disease identifiers in ONCDISDB",
Expand Down Expand Up @@ -1594,16 +1586,14 @@ exports[`clinvar metadata 1`] = `
"Type": "String",
},
"SCIDISDB": {
"Description": ""Tag-value pairs of disease database name and identifier submitted for somatic clinial impact classifications",
"Description": "Tag-value pairs of disease database name and identifier submitted for somatic clinial impact classifications, e.g. MedGen:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. MedGen:NNNNNN"": undefined,
},
"SCIDISDBINCL": {
"Description": ""For included variant: Tag-value pairs of disease database name and identifier for somatic clinical impact classifications",
"Description": "For included variant: Tag-value pairs of disease database name and identifier for somatic clinical impact classifications, e.g. OMIM:NNNNNN",
"Number": ".",
"Type": "String",
"e.g. OMIM:NNNNNN"": undefined,
},
"SCIDN": {
"Description": "ClinVar's preferred disease name for the concept specified by disease identifiers in SCIDISDB",
Expand Down
9 changes: 9 additions & 0 deletions test/__snapshots__/parseMetaString.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@ exports[`m1 1`] = `
],
}
`;

exports[`m2 1`] = `
{
"Description": "dbSNP membership, build 129",
"ID": "DB",
"Number": "0",
"Type": "Flag",
}
`;
10 changes: 9 additions & 1 deletion test/parseMetaString.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect, test } from 'vitest'
import { parseMetaString } from '../src/parseMetaString'

test('m1', () => {
test('array in values', () => {
const result1 = parseMetaString(
'<ID=Assay,Type=String,Number=.,Values=[WholeGenome, Exome]>',
)
Expand All @@ -11,3 +11,11 @@ test('m1', () => {
expect(result1).toEqual(result2)
expect(result1).toMatchSnapshot()

Check failure on line 12 in test/parseMetaString.test.ts

View workflow job for this annotation

GitHub Actions / Lint, build, and test on node 20.x and ubuntu-latest

test/parseMetaString.test.ts > array in values

Error: Snapshot `array in values 1` mismatched ❯ test/parseMetaString.test.ts:12:19
})

test('quoted string with comma in description', () => {
expect(
parseMetaString(
'<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">',
),
).toMatchSnapshot()

Check failure on line 20 in test/parseMetaString.test.ts

View workflow job for this annotation

GitHub Actions / Lint, build, and test on node 20.x and ubuntu-latest

test/parseMetaString.test.ts > quoted string with comma in description

Error: Snapshot `quoted string with comma in description 1` mismatched ❯ test/parseMetaString.test.ts:20:5
})

0 comments on commit eb9d748

Please sign in to comment.