Wednesday, June 20, 2007

Getting All Subsets of a Set

For example, let us generate all subsets of a set with 10 different elements.

-- you will need two auxiliary tables
SELECT * INTO dbo.Numbers FROM AnotherDB.dbo.Numbers WHERE Number BETWEEN 1 AND 1024
GO
CREATE UNIQUE CLUSTERED INDEX abcd ON Numbers(Number)
GO
-- another auxiliary table
CREATE TABLE NumbersIn2Power(Number INT NOT NULL, Power2 INT NOT NULL)
INSERT NumbersIn2Power VALUES(1,1)
INSERT NumbersIn2Power VALUES(2,2)
INSERT NumbersIn2Power VALUES(3,4)
INSERT NumbersIn2Power VALUES(4,8)
INSERT NumbersIn2Power VALUES(5,16)
INSERT NumbersIn2Power VALUES(6,32)
INSERT NumbersIn2Power VALUES(7,64)
INSERT NumbersIn2Power VALUES(8,128)
INSERT NumbersIn2Power VALUES(9,256)
INSERT NumbersIn2Power VALUES(10,512)
GO
CREATE UNIQUE CLUSTERED INDEX abcdef ON NumbersIn2Power(Number)
GO

-- the set itself has 10 elements

CREATE TABLE #t(itemNum INT, val INT)
INSERT #t VALUES(1,11)
INSERT #t VALUES(2,12)
INSERT #t VALUES(3,13)
INSERT #t VALUES(4,14)
INSERT #t VALUES(5,15)
INSERT #t VALUES(6,16)
INSERT #t VALUES(7,17)
INSERT #t VALUES(8,18)
INSERT #t VALUES(9,19)
INSERT #t VALUES(10,20)
GO

SET STATISTICS TIME ON
SET STATISTICS IO ON
GO

-- approach 1, using a cross join
SELECT n.number AS SetNumber, #t.val FROM NumbersIn2Power n2
JOIN dbo.Numbers n ON n2.Power2 & n.number > 0
JOIN #t ON n2.number = #t.itemNum
ORDER BY n.number, #t.itemNum

/*
Table 'Numbers'. Scan count 1, logical reads 40, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'NumbersIn2Power'. Scan count 0, logical reads 20, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#t__________________________________________________________________________________________________________________000000000031'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 71 ms.

*/

-- approach two, using recursion

WITH Subsets AS (
SELECT itemNum, CAST(',' + CAST(itemNum AS VARCHAR(10)) + ',' AS
CHAR(50)) AS LinesList, CAST(1 AS INT) AS Iteration, val
FROM #t
UNION ALL
SELECT t.itemNum, CAST(CAST(RTRIM(s.LinesList) AS VARCHAR(200)) +
CAST(t.itemNum AS VARCHAR(10)) + ',' AS CHAR(50)) AS LinesList,
s.Iteration + 1 AS Iteration, s.val + t.val AS val
FROM Subsets s JOIN #t t ON s.itemNum >= t.itemNum
WHERE s.Iteration <> t.itemNum
)
SELECT * FROM Subsets ORDER BY LinesList

/*
Table 'Worktable'. Scan count 2, logical reads 6121, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#t__________________________________________________________________________________________________________________000000000031'. Scan count 2, logical reads 1023, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

SQL Server Execution Times:
CPU time = 32 ms, elapsed time = 87 ms.
*/

As you have seen, elapsed time is similar, but the first approach uses less CPU. Note that I ran each script more than once, so that parse/compile time is not included in the costs.

0 Comments:

Post a Comment

<< Home